#Partisan Stability During Turbulent Times: Institute for the Study of Citizens and Politics (ISCAP) data analysis
#Donald P. Green and Paul B. Platzman
#May 2021

#NOTE TO USER: always run all code from the start of the document until line 252, which reads: "####Creating data set [SUBJECTS WITH ANY NUMBER OF MISSING VALUES]." This reads in and processes the data in preparation for analysis. After line 252, any code chunks separated by four hashtags can be run on their own. 

#Data for this script can be found at the following URL: https://dataverse.harvard.edu/dataset.xhtml?persistentId=doi:10.7910/DVN/CYISG1. After the data is downloaded, it can be imported in the following manner:

####Loading data
load("./wave6-forrelease-02052021.Rdata")
load("./wave7-forrelease-02052021.Rdata")
load("./wave8-forrelease-02052021.Rdata")
load("./wave9-forrelease-02052021.Rdata")
load("./wave10-forrelease-02052021.Rdata")
load("./wave11-forrelease-02052021.Rdata")
load("./wave12-forrelease-02052021.Rdata")
load("./wave13-forrelease-02052021.Rdata")
load("./wave14-forrelease-02212021.Rdata")
load("./wave15-forrelease-02212021.Rdata")

#Correcting for dataset error
wave10 = subset(wave10, !is.na(wave10$tm_start))



####Defining functions
PID = function(x){
  if(is.na(x)){
    return(NA)
  }
  #PID3
  else if(x=="Democrat"){
    return(as.numeric(-1))
  }
  else if(x=="Republican"){
    return(as.numeric(1))
  }
  else if(x=="Independent"){
    return(as.numeric(0))
  }
  #PID7
  else if(x=="Strong Democrat"){
    return(as.numeric(-3))
  }
  else if(x=="Not Strong Democrat"){
    return(as.numeric(-2))
  }
  else if(x=="Leans Democrat"){
    return(as.numeric(-1))
  }
  else if(x=="Undecided/Independent/Other"){
    return(as.numeric(0))
  }
  else if(x=="Leans Republican"){
    return(as.numeric(1))
  }
  else if(x=="Not Strong Republican"){
    return(as.numeric(2))
  }
  else if(x=="Strong Republican"){
    return(as.numeric(3))
  }
  else{
    return(NA)
  }
}

convert_to_numeric = function(df){
  for(i in 1:nrow(df)){
    for(j in 1:ncol(df)){
      df[i,j] = as.numeric(PID(df[i,j]))
    }
  }
  return(df)}

convert_to_numeric_2 = function(df){
  for (i in 1:ncol(df)){
    df[,i] = sapply(df[,i], as.numeric)
  }
  return(df)
}

income = function(x){
  if(is.na(x)){
    return(NA)
  }
  else if(x=="Less than $5,000"){
    return(as.numeric(2500))
  }
  else if(x=="$5,000 to $7,499"){
    return(as.numeric(6250))
  }
  else if(x=="$7,500 to $9,999"){
    return(as.numeric(8750))
  }
  else if(x=="$10,000 to $12,499"){
    return(as.numeric(11250))
  }
  else if(x=="$12,500 to $14,999"){
    return(as.numeric(13750))
  }
  else if(x=="$15,000 to $19,999"){
    return(as.numeric(15750))
  }
  else if(x=="$20,000 to $24,999"){
    return(as.numeric(22500))
  }
  else if(x=="$25,000 to $29,999"){
    return(as.numeric(27500))
  }
  else if(x=="$30,000 to $34,999"){
    return(as.numeric(32500))
  }
  else if(x=="$35,000 to $39,999"){
    return(as.numeric(37500))
  }
  else if(x=="$40,000 to $49,999"){
    return(as.numeric(45000))
  }
  else if(x=="$50,000 to $59,999"){
    return(as.numeric(55000))
  }
  else if(x=="$60,000 to $74,999"){
    return(as.numeric(67500))
  }
  else if(x=="$75,000 to $84,999"){
    return(as.numeric(80000))
  }
  else if(x=="$85,000 to $99,999"){
    return(as.numeric(92500))
  }
  else if(x=="$100,000 to $124,999"){
    return(as.numeric(112500))
  }
  else if(x=="$125,000 to $149,999"){
    return(as.numeric(137500))
  }
  else if(x=="$150,000 to $174,999"){
    return(as.numeric(162500))
  }
  else if(x=="$175,000 or more"){
    return(as.numeric(185000))
  }
  else{
    return(NA)
  }
}

missingness_as_category = function(df){
  for(i in 1:nrow(df)){
    for(j in 1:ncol(df)){
      if (is.na(df[i,j])){
        df[i,j] = 1
      }
      else {
        df[i,j] = 0
      }
    }
  }
  return(df)
}

wiley_r_squared = function(df,first_wave){
  #Defining relative waves
  y0 = df[,first_wave-1]
  y1 = df[,first_wave]
  y2 = df[,first_wave+1]
  y3 = df[,first_wave+2]
  
  #Computing components of R^2
  cov_y1_y2_squared = cov(y1,y2)^2
  var_y1 = var(y1)
  var_y2 = var(y2)
  var_epsilon1 = var_y1 - (cov(y1,y2)*(cov(y0,y1)/cov(y0,y2)))
  var_epsilon2 = var_y2 - (cov(y2,y3)*(cov(y1,y2)/cov(y1,y3)))
  
  #Computing implied R^2
  r_squared_12 = cov_y1_y2_squared/((var_y1-var_epsilon1)*(var_y2-var_epsilon2))
  
  return(r_squared_12)
}


####Creating and renaming columns
##PID7
wave6$PID7_w6 = ifelse(wave6$Q7=="Democrat" & wave6$Q9=="Strong Democrat","Strong Democrat",ifelse(wave6$Q7=="Democrat" & wave6$Q9=="Not very strong Democrat","Not Strong Democrat",ifelse(wave6$Q7=="Independent" & wave6$Q10=="Democratic Party","Leans Democrat",ifelse(wave6$Q7=="Independent" & wave6$Q10=="Republican Party","Leans Republican",ifelse(wave6$Q7=="Republican" & wave6$Q8=="Strong Republican","Strong Republican",ifelse(wave6$Q7=="Republican" & wave6$Q8=="Not very strong Republican","Not Strong Republican",ifelse(wave6$Q7=="Independent","Undecided/Independent/Other",NA)))))))
table(wave6$Q7,wave6$PID7_w6)

wave7$PID7_w7 = ifelse(wave7$Q7=="Democrat" & wave7$Q9=="Strong Democrat","Strong Democrat",ifelse(wave7$Q7=="Democrat" & wave7$Q9=="Not very strong Democrat","Not Strong Democrat",ifelse(wave7$Q7=="Independent" & wave7$Q10=="Democratic Party","Leans Democrat",ifelse(wave7$Q7=="Independent" & wave7$Q10=="Republican Party","Leans Republican",ifelse(wave7$Q7=="Republican" & wave7$Q8=="Strong Republican","Strong Republican",ifelse(wave7$Q7=="Republican" & wave7$Q8=="Not very strong Republican","Not Strong Republican",ifelse(wave7$Q7=="Independent","Undecided/Independent/Other",NA)))))))
table(wave7$Q7,wave7$PID7_w7)

wave12$PID7_w12 = ifelse(wave12$Q7=="Democrat" & wave12$Q9=="Strong Democrat","Strong Democrat",ifelse(wave12$Q7=="Democrat" & wave12$Q9=="Not very strong Democrat","Not Strong Democrat",ifelse(wave12$Q7=="Independent" & wave12$Q10=="Democratic Party","Leans Democrat",ifelse(wave12$Q7=="Independent" & wave12$Q10=="Republican Party","Leans Republican",ifelse(wave12$Q7=="Republican" & wave12$Q8=="Strong Republican","Strong Republican",ifelse(wave12$Q7=="Republican" & wave12$Q8=="Not very strong Republican","Not Strong Republican",ifelse(wave12$Q7=="Independent","Undecided/Independent/Other",NA)))))))
table(wave12$Q7,wave12$PID7_w12)

wave14$PID7_w14 = ifelse(wave14$Q7=="Democrat" & wave14$Q9=="Strong Democrat","Strong Democrat",ifelse(wave14$Q7=="Democrat" & wave14$Q9=="Not very strong Democrat","Not Strong Democrat",ifelse(wave14$Q7=="Independent" & wave14$Q10=="Democratic Party","Leans Democrat",ifelse(wave14$Q7=="Independent" & wave14$Q10=="Republican Party","Leans Republican",ifelse(wave14$Q7=="Republican" & wave14$Q8=="Strong Republican","Strong Republican",ifelse(wave14$Q7=="Republican" & wave14$Q8=="Not very strong Republican","Not Strong Republican",ifelse(wave14$Q7=="Independent","Undecided/Independent/Other",NA)))))))
table(wave14$Q7,wave14$PID7_w14)

wave15$PID7_w15 = ifelse(wave15$Q7=="Democrat" & wave15$Q9=="Strong Democrat","Strong Democrat",ifelse(wave15$Q7=="Democrat" & wave15$Q9=="Not very strong Democrat","Not Strong Democrat",ifelse(wave15$Q7=="Independent" & wave15$Q10=="Democratic Party","Leans Democrat",ifelse(wave15$Q7=="Independent" & wave15$Q10=="Republican Party","Leans Republican",ifelse(wave15$Q7=="Republican" & wave15$Q8=="Strong Republican","Strong Republican",ifelse(wave15$Q7=="Republican" & wave15$Q8=="Not very strong Republican","Not Strong Republican",ifelse(wave15$Q7=="Independent","Undecided/Independent/Other",NA)))))))
table(wave15$Q7,wave15$PID7_w15)

names(wave10)[names(wave10) == 'xParty7'] <- 'PID7_w10_pre'
names(wave11)[names(wave11) == 'xParty7'] <- 'PID7_w11_pre'
names(wave13)[names(wave13) == 'XPARTY7'] <- 'PID7_w13'
names(wave14)[names(wave14) == 'xparty7'] <- 'PID7_w14_pre'

##PID3
names(wave6)[names(wave6) == 'Q7'] <- 'PID3_w6'
names(wave7)[names(wave7) == 'Q7'] <- 'PID3_w7'
names(wave12)[names(wave12) == 'Q7'] <- 'PID3_w12'
names(wave14)[names(wave14) == 'Q7'] <- 'PID3_w14'
names(wave15)[names(wave15) == 'Q7'] <- 'PID3_w15'

wave10$PID3_w10_pre = ifelse(wave10$PID7_w10_pre=="Strong Democrat"|wave10$PID7_w10_pre=="Not Strong Democrat","Democrat",ifelse(wave10$PID7_w10_pre=="Strong Republican"|wave10$PID7_w10_pre=="Not Strong Republican","Republican",ifelse(wave10$PID7_w10_pre=="Undecided/Independent/Other"|wave10$PID7_w10_pre=="Leans Democrat"|wave10$PID7_w10_pre=="Leans Republican","Independent",NA)))
table(wave10$PID7_w10_pre,wave10$PID3_w10_pre)

wave11$PID3_w11_pre = ifelse(wave11$PID7_w11_pre=="Strong Democrat"|wave11$PID7_w11_pre=="Not Strong Democrat","Democrat",ifelse(wave11$PID7_w11_pre=="Strong Republican"|wave11$PID7_w11_pre=="Not Strong Republican","Republican",ifelse(wave11$PID7_w11_pre=="Undecided/Independent/Other"|wave11$PID7_w11_pre=="Leans Democrat"|wave11$PID7_w11_pre=="Leans Republican","Independent",NA)))
table(wave11$PID7_w11_pre,wave11$PID3_w11_pre)

wave13$PID3_w13 = ifelse(wave13$PID7_w13=="Strong Democrat"|wave13$PID7_w13=="Not Strong Democrat","Democrat",ifelse(wave13$PID7_w13=="Strong Republican"|wave13$PID7_w13=="Not Strong Republican","Republican",ifelse(wave13$PID7_w13=="Undecided/Independent/Other"|wave13$PID7_w13=="Leans Democrat"|wave13$PID7_w13=="Leans Republican","Independent",NA)))
table(wave13$PID7_w13,wave13$PID3_w13)

wave14$PID3_w14_pre = ifelse(wave14$PID7_w14_pre=="Strong Democrat"|wave14$PID7_w14_pre=="Not Strong Democrat","Democrat",ifelse(wave14$PID7_w14_pre=="Strong Republican"|wave14$PID7_w14_pre=="Not Strong Republican","Republican",ifelse(wave14$PID7_w14_pre=="Undecided/Independent/Other"|wave14$PID7_w14_pre=="Leans Democrat"|wave14$PID7_w14_pre=="Leans Republican","Independent",NA)))
table(wave14$PID7_w14_pre,wave14$PID3_w14_pre)

##PPPADATE
names(wave10)[names(wave10) == 'pppadate'] <- 'pppadate_w10'
names(wave11)[names(wave11) == 'pppadate'] <- 'pppadate_w11'
names(wave14)[names(wave14) == 'pppadate'] <- 'pppadate_w14'

##Reassigning values collected outside date range as NAs
summary(wave10$PID7_w10_pre)
wave10$PID7_w10_pre = ifelse(wave10$pppadate_w10<20150728|wave10$pppadate_w10>20150903,NA,wave10$PID7_w10_pre)
wave10$PID7_w10_pre = as.factor(ifelse(wave10$PID7_w10_pre==1,"Strong Republican",ifelse(wave10$PID7_w10_pre==2,"Not Strong Republican",ifelse(wave10$PID7_w10_pre==3,"Leans Republican",ifelse(wave10$PID7_w10_pre==4,"Undecided/Independent/Other",ifelse(wave10$PID7_w10_pre==5,"Leans Democrat",ifelse(wave10$PID7_w10_pre==6,"Not Strong Democrat",ifelse(wave10$PID7_w10_pre==7,"Strong Democrat",NA))))))))
summary(wave10$PID7_w10_pre)

summary(wave11$PID7_w11_pre)
wave11$PID7_w11_pre = ifelse(wave11$pppadate_w11<20160715|wave11$pppadate_w11>20160827,NA,wave11$PID7_w11_pre)
wave11$PID7_w11_pre = as.factor(ifelse(wave11$PID7_w11_pre==1|wave11$PID7_w11_pre==2,NA,ifelse(wave11$PID7_w11_pre==3,"Strong Republican",ifelse(wave11$PID7_w11_pre==4,"Not Strong Republican",ifelse(wave11$PID7_w11_pre==5,"Leans Republican",ifelse(wave11$PID7_w11_pre==6,"Undecided/Independent/Other",ifelse(wave11$PID7_w11_pre==7,"Leans Democrat",ifelse(wave11$PID7_w11_pre==8,"Not Strong Democrat",ifelse(wave11$PID7_w11_pre==9,"Strong Democrat","Other")))))))))
summary(wave11$PID7_w11_pre)

summary(wave14$PID7_w14_pre)
wave14$PID7_w14_pre = ifelse(wave14$pppadate_w14<20190611|wave14$pppadate_w14>20190820,NA,wave14$PID7_w14_pre)
wave14$PID7_w14_pre = as.factor(ifelse(wave14$PID7_w14_pre==1,NA,ifelse(wave14$PID7_w14_pre==2,"Strong Republican",ifelse(wave14$PID7_w14_pre==3,"Not Strong Republican",ifelse(wave14$PID7_w14_pre==4,"Leans Republican",ifelse(wave14$PID7_w14_pre==5,"Undecided/Independent/Other",ifelse(wave14$PID7_w14_pre==6,"Leans Democrat",ifelse(wave14$PID7_w14_pre==7,"Not Strong Democrat",ifelse(wave14$PID7_w14_pre==8,"Strong Democrat","Other")))))))))
summary(wave14$PID7_w14_pre)

wave10$PID3_w10_pre = ifelse(wave10$pppadate_w10<20150728|wave10$pppadate_w10>20150903,NA,wave10$PID3_w10_pre)
wave11$PID3_w11_pre = ifelse(wave11$pppadate_w11<20160715|wave11$pppadate_w11>20160827,NA,wave11$PID3_w11_pre)
wave14$PID3_w14_pre = ifelse(wave14$pppadate_w14<20190611|wave14$pppadate_w14>20190820,NA,wave14$PID3_w14_pre)



####Creating data set [SUBJECTS WITH ANY NUMBER OF MISSING VALUES]
##Assign a minimum number of missing values each row must have in order to remain in the data set. 0 is equivalent to all cases. 9 is the maximum value applicable.
missing_value_minimum = 0

##Proposed final nine measures OUTER JOIN
waves67 = merge(wave6,wave7, by="MNO", all=TRUE)
waves6710 = merge(waves67,wave10, by="MNO", all=TRUE)
waves6710_11 = merge(waves6710,wave11, by="MNO", all=TRUE)
waves6710_12 = merge(waves6710_11,wave12, by="MNO", all=TRUE)
waves6710_13 = merge(waves6710_12,wave13, by.x="MNO", by.y="mno", all=TRUE)
waves6710_14 = merge(waves6710_13,wave14, by.x="MNO", by.y="mno", all=TRUE)
waves6710_15 = merge(waves6710_14,wave15, by.x="MNO", by.y="mno", all=TRUE)

##Filtering by columns
all_waves_PID7 = waves6710_15[, c('PID7_w6','PID7_w7','PID7_w10_pre','PID7_w11_pre','PID7_w12','PID7_w13','PID7_w14_pre','PID7_w14','PID7_w15')]
all_waves_PID7$PID7_w10_pre = as.character(all_waves_PID7$PID7_w10_pre)
all_waves_PID7$PID7_w11_pre = as.character(all_waves_PID7$PID7_w11_pre)
all_waves_PID7$PID7_w13 = as.character(all_waves_PID7$PID7_w13)
all_waves_PID7$PID7_w14_pre = as.character(all_waves_PID7$PID7_w14_pre)

all_waves_PID3 = waves6710_15[, c('PID3_w6','PID3_w7','PID3_w10_pre','PID3_w11_pre','PID3_w12','PID3_w13','PID3_w14_pre','PID3_w14','PID3_w15')]
all_waves_PID3$PID3_w6 = as.character(all_waves_PID3$PID3_w6)
all_waves_PID3$PID3_w7 = as.character(all_waves_PID3$PID3_w7)
all_waves_PID3$PID3_w12 = as.character(all_waves_PID3$PID3_w12)
all_waves_PID3$PID3_w13 = as.character(all_waves_PID3$PID3_w13)
all_waves_PID3$PID3_w14 = as.character(all_waves_PID3$PID3_w14)
all_waves_PID3$PID3_w15 = as.character(all_waves_PID3$PID3_w15)

##Converting categorical values to numeric
all_waves_PID7 = convert_to_numeric(all_waves_PID7)
all_waves_PID3 = convert_to_numeric(all_waves_PID3)

all_waves_PID7 = convert_to_numeric_2(all_waves_PID7)
all_waves_PID3 = convert_to_numeric_2(all_waves_PID3)

##Filtering by rows
#Removing if too few NAs
all_waves_PID7 = all_waves_PID7[rowSums(is.na(all_waves_PID7))>=missing_value_minimum,]
all_waves_PID3 = all_waves_PID3[rowSums(is.na(all_waves_PID3))>=missing_value_minimum,]

summary(rowSums(is.na(all_waves_PID7))) #Checking that the correct type of cases remain
summary(rowSums(is.na(all_waves_PID3))) #Checking that the correct type of cases remain

##Computing statistics
#Correlations
cor_PID7 = round(cor(all_waves_PID7, use="pairwise.complete.obs"),3)
cor_PID3 = round(cor(all_waves_PID3, use="pairwise.complete.obs"),3)

lower.tri(cor_PID7, diag = FALSE)
upper.tri(cor_PID7, diag = FALSE)
lower_7<-cor_PID7
lower_7[lower.tri(cor_PID7, diag=TRUE)]=""
lower_7<-as.data.frame(lower_7)
lower_7
write.csv(lower_7, "./total_cases_PID7_correlations.csv")

lower.tri(cor_PID3, diag = FALSE)
upper.tri(cor_PID3, diag = FALSE)
lower_3<-cor_PID3
lower_3[lower.tri(cor_PID3, diag=TRUE)]=""
lower_3<-as.data.frame(lower_3)
lower_3
write.csv(lower_3, "./total_cases_PID3_correlations.csv")

#Counting the number of observations in each cell
library("psych")
cell_counts_PID7 = pairwiseCount(all_waves_PID7, diagonal=FALSE) #Highest cell count is wave6-wave7 with 2058. Lowest cell count is wave12-wave14pre with 592.
cell_counts_PID3 = pairwiseCount(all_waves_PID3, diagonal=FALSE) #Highest cell count is wave6-wave7 with 2066. Lowest cell count is wave12-wave14pre with 595.

write.csv(cell_counts_PID7, "./cell_counts_PID7_correlations.csv")
write.csv(cell_counts_PID3, "./cell_counts_PID3_correlations.csv")

#Means and SDs (Individuals)
all_waves_PID7$Mean_PID7 = apply(all_waves_PID7[,1:9], 1, mean, na.rm = TRUE)
all_waves_PID7$SD_PID7 = apply(all_waves_PID7[,1:9], 1, sd, na.rm = TRUE)

summary(all_waves_PID7$Mean_PID7)
summary(all_waves_PID7$SD_PID7)

all_waves_PID3$Mean_PID3 = apply(all_waves_PID3[,1:9], 1, mean, na.rm = TRUE)
all_waves_PID3$SD_PID3 = apply(all_waves_PID3[,1:9], 1, sd, na.rm = TRUE)

summary(all_waves_PID3$Mean_PID3)
summary(all_waves_PID3$SD_PID3)

#Means and SDs (Waves)
as.data.frame(round(apply(all_waves_PID7, 2, mean, na.rm=TRUE),2))
as.data.frame(round(apply(all_waves_PID7, 2, sd, na.rm=TRUE),2))

as.data.frame(round(apply(all_waves_PID3, 2, mean, na.rm=TRUE),2))
as.data.frame(round(apply(all_waves_PID3, 2, sd, na.rm=TRUE),2))



####Creating data set [COMPLETE CASES ONLY]
##Proposed final nine measures INNER JOIN
waves67 = merge(wave6,wave7, by="MNO")
waves6710 = merge(waves67,wave10, by="MNO")
waves6710_11 = merge(waves6710,wave11, by="MNO")
waves6710_12 = merge(waves6710_11,wave12, by="MNO")
waves6710_13 = merge(waves6710_12,wave13, by.x="MNO", by.y="mno")
waves6710_14 = merge(waves6710_13,wave14, by.x="MNO", by.y="mno")
waves6710_15 = merge(waves6710_14,wave15, by.x="MNO", by.y="mno")

##Filtering by rows
#Removing if PID3 value is not applicable
waves6710_15 = waves6710_15[waves6710_15$PID3_w6 %in% c("Democrat", "Independent", "Republican"), ]
waves6710_15 = waves6710_15[waves6710_15$PID3_w7 %in% c("Democrat", "Independent", "Republican"), ]
waves6710_15 = waves6710_15[waves6710_15$PID3_w12 %in% c("Democrat", "Independent", "Republican"), ]
waves6710_15 = waves6710_15[waves6710_15$PID3_w14 %in% c("Democrat", "Independent", "Republican"), ]
waves6710_15 = waves6710_15[waves6710_15$PID3_w15 %in% c("Democrat", "Independent", "Republican"), ]

waves6710_15 = waves6710_15[waves6710_15$PID3_w10_pre %in% c("Democrat", "Independent", "Republican"), ]
waves6710_15 = waves6710_15[waves6710_15$PID3_w11_pre %in% c("Democrat", "Independent", "Republican"), ]
waves6710_15 = waves6710_15[waves6710_15$PID3_w13 %in% c("Democrat", "Independent", "Republican"), ]
waves6710_15 = waves6710_15[waves6710_15$PID3_w14_pre %in% c("Democrat", "Independent", "Republican"), ]

#Removing if PID7 value is not applicable
waves6710_15 = waves6710_15[waves6710_15$PID7_w6 %in% c("Strong Democrat", "Not Strong Democrat", "Leans Democrat", "Undecided/Independent/Other", "Leans Republican", "Not Strong Republican", "Strong Republican"), ]
waves6710_15 = waves6710_15[waves6710_15$PID7_w7 %in% c("Strong Democrat", "Not Strong Democrat", "Leans Democrat", "Undecided/Independent/Other", "Leans Republican", "Not Strong Republican", "Strong Republican"), ]
waves6710_15 = waves6710_15[waves6710_15$PID7_w12 %in% c("Strong Democrat", "Not Strong Democrat", "Leans Democrat", "Undecided/Independent/Other", "Leans Republican", "Not Strong Republican", "Strong Republican"), ]
waves6710_15 = waves6710_15[waves6710_15$PID7_w14 %in% c("Strong Democrat", "Not Strong Democrat", "Leans Democrat", "Undecided/Independent/Other", "Leans Republican", "Not Strong Republican", "Strong Republican"), ]
waves6710_15 = waves6710_15[waves6710_15$PID7_w15 %in% c("Strong Democrat", "Not Strong Democrat", "Leans Democrat", "Undecided/Independent/Other", "Leans Republican", "Not Strong Republican", "Strong Republican"), ]

waves6710_15 = waves6710_15[waves6710_15$PID7_w10_pre %in% c("Strong Democrat", "Not Strong Democrat", "Leans Democrat", "Undecided/Independent/Other", "Leans Republican", "Not Strong Republican", "Strong Republican"), ]
waves6710_15 = waves6710_15[waves6710_15$PID7_w11_pre %in% c("Strong Democrat", "Not Strong Democrat", "Leans Democrat", "Undecided/Independent/Other", "Leans Republican", "Not Strong Republican", "Strong Republican"), ]
waves6710_15 = waves6710_15[waves6710_15$PID7_w13 %in% c("Strong Democrat", "Not Strong Democrat", "Leans Democrat", "Undecided/Independent/Other", "Leans Republican", "Not Strong Republican", "Strong Republican"), ]
waves6710_15 = waves6710_15[waves6710_15$PID7_w14_pre %in% c("Strong Democrat", "Not Strong Democrat", "Leans Democrat", "Undecided/Independent/Other", "Leans Republican", "Not Strong Republican", "Strong Republican"), ]

#Removing if pre-survey partisanship measure is not within the defined date range
waves6710_15 = waves6710_15[waves6710_15$pppadate_w10>=20150728 & waves6710_15$pppadate_w10<=20150903,] #The cut points could be extended to include more subjects
waves6710_15 = waves6710_15[waves6710_15$pppadate_w11>=20160715 & waves6710_15$pppadate_w11<=20160827,] #The cut points could be extended to include more subjects
waves6710_15 = waves6710_15[waves6710_15$pppadate_w14>=20190611 & waves6710_15$pppadate_w14<=20190820,] #The cut points could be extended to include more subjects

##Filtering by columns
all_waves_PID7 = waves6710_15[, c('PID7_w6','PID7_w7','PID7_w10_pre','PID7_w11_pre','PID7_w12','PID7_w13','PID7_w14_pre','PID7_w14','PID7_w15')]
all_waves_PID7$PID7_w10_pre = as.character(all_waves_PID7$PID7_w10_pre)
all_waves_PID7$PID7_w11_pre = as.character(all_waves_PID7$PID7_w11_pre)
all_waves_PID7$PID7_w13 = as.character(all_waves_PID7$PID7_w13)
all_waves_PID7$PID7_w14_pre = as.character(all_waves_PID7$PID7_w14_pre)

all_waves_PID3 = waves6710_15[, c('PID3_w6','PID3_w7','PID3_w10_pre','PID3_w11_pre','PID3_w12','PID3_w13','PID3_w14_pre','PID3_w14','PID3_w15')]
all_waves_PID3$PID3_w6 = as.character(all_waves_PID3$PID3_w6)
all_waves_PID3$PID3_w7 = as.character(all_waves_PID3$PID3_w7)
all_waves_PID3$PID3_w12 = as.character(all_waves_PID3$PID3_w12)
all_waves_PID3$PID3_w13 = as.character(all_waves_PID3$PID3_w13)
all_waves_PID3$PID3_w14 = as.character(all_waves_PID3$PID3_w14)
all_waves_PID3$PID3_w15 = as.character(all_waves_PID3$PID3_w15)

##Converting categorical values to numeric
all_waves_PID7 = convert_to_numeric(all_waves_PID7)
all_waves_PID3 = convert_to_numeric(all_waves_PID3)

all_waves_PID7 = convert_to_numeric_2(all_waves_PID7)
all_waves_PID3 = convert_to_numeric_2(all_waves_PID3)

##Computing statistics
#Correlations
cor_PID7_listwise = round(cor(all_waves_PID7),3)
cor_PID3_listwise = round(cor(all_waves_PID3),3)

lower.tri(cor_PID7_listwise, diag = FALSE)
upper.tri(cor_PID7_listwise, diag = FALSE)
lower_7_listwise<-cor_PID7_listwise
lower_7_listwise[lower.tri(cor_PID7_listwise, diag=TRUE)]=""
lower_7_listwise<-as.data.frame(lower_7_listwise)
lower_7_listwise
write.csv(lower_7_listwise, "./complete_cases_PID7_correlations.csv")

lower.tri(cor_PID3_listwise, diag = FALSE)
upper.tri(cor_PID3_listwise, diag = FALSE)
lower_3_listwise<-cor_PID3_listwise
lower_3_listwise[lower.tri(cor_PID3_listwise, diag=TRUE)]=""
lower_3_listwise<-as.data.frame(lower_3_listwise)
lower_3_listwise
write.csv(lower_3_listwise, "./complete_cases_PID3_alternative_correlations.csv")

library("psych")
pairwiseCount(all_waves_PID7)
pairwiseCount(all_waves_PID3)

######
#To create the final correlation matrices for presentation, combine the pairwise correlations for the total cases (missing_value_minimum = 0) with the correlations for the complete case dataframes (which is equivalent to listwise deletion) to match the four matrices you have in the "Total Cases N=2606" tab and the "Complete Cases N=365" tab
#PID7 Correlation Matrix (pairwise is lower diagonal, listwise is upper diagonal)
lower.tri(cor_PID7_listwise, diag = FALSE)
lower.tri(cor_PID7, diag = FALSE)
lower_7_listwise<-cor_PID7_listwise
lower_7<-cor_PID7

for (i in 1:nrow(lower_7_listwise)){
  for (j in 1:ncol(lower_7_listwise)){
    if (i>j){
      lower_7_listwise[i,j] = lower_7[i,j]
    }
    if (lower_7_listwise[i,j] == 1.0){
      lower_7_listwise[i,j] = "**"
    }
  }
}

lower_7_listwise<-as.data.frame(lower_7_listwise)
lower_7_listwise
write.csv(lower_7_listwise, "./final_PID7_correlations.csv")

#PID3 Correlation Matrix (pairwise is lower diagonal, listwise is upper diagonal)
lower.tri(cor_PID3_listwise, diag = FALSE)
lower.tri(cor_PID3, diag = FALSE)
lower_3_listwise<-cor_PID3_listwise
lower_3<-cor_PID3

for (i in 1:nrow(lower_3_listwise)){
  for (j in 1:ncol(lower_3_listwise)){
    if (i>j){
      lower_3_listwise[i,j] = lower_3[i,j]
    }
    if (lower_3_listwise[i,j] == 1.0){
      lower_3_listwise[i,j] = "**"
    }
  }
}

lower_3_listwise<-as.data.frame(lower_3_listwise)
lower_3_listwise
write.csv(lower_3_listwise, "./final_PID3_correlations.csv")
######

#Means and SDs (Individuals)
all_waves_PID7$Mean_PID7 = apply(all_waves_PID7[,1:9], 1, mean)
all_waves_PID7$SD_PID7 = apply(all_waves_PID7[,1:9], 1, sd)

summary(all_waves_PID7$Mean_PID7)
summary(all_waves_PID7$SD_PID7)

all_waves_PID3$Mean_PID3 = apply(all_waves_PID3[,1:9], 1, mean)
all_waves_PID3$SD_PID3 = apply(all_waves_PID3[,1:9], 1, sd)

summary(all_waves_PID3$Mean_PID3)
summary(all_waves_PID3$SD_PID3)

#Means and SDs (Waves)
as.data.frame(round(apply(all_waves_PID7, 2, mean, na.rm=TRUE),2))
as.data.frame(round(apply(all_waves_PID7, 2, sd, na.rm=TRUE),2))

as.data.frame(round(apply(all_waves_PID3, 2, mean, na.rm=TRUE),2))
as.data.frame(round(apply(all_waves_PID3, 2, sd, na.rm=TRUE),2))



####Predicting Number of Missing Responses (Respondent-wise) 
##Proposed final nine measures OUTER JOIN
waves67 = merge(wave6,wave7, by="MNO", all=TRUE)
waves6710 = merge(waves67,wave10, by="MNO", all=TRUE)
waves6710_11 = merge(waves6710,wave11, by="MNO", all=TRUE)
waves6710_12 = merge(waves6710_11,wave12, by="MNO", all=TRUE)
waves6710_13 = merge(waves6710_12,wave13, by.x="MNO", by.y="mno", all=TRUE)
waves6710_14 = merge(waves6710_13,wave14, by.x="MNO", by.y="mno", all=TRUE)
waves6710_15 = merge(waves6710_14,wave15, by.x="MNO", by.y="mno", all=TRUE)

##Filtering by columns
all_waves_PID7 = waves6710_15[, c('PID7_w6','PID7_w7','PID7_w10_pre','PID7_w11_pre','PID7_w12','PID7_w13','PID7_w14_pre','PID7_w14','PID7_w15')]
all_waves_PID7$PID7_w10_pre = as.character(all_waves_PID7$PID7_w10_pre)
all_waves_PID7$PID7_w11_pre = as.character(all_waves_PID7$PID7_w11_pre)
all_waves_PID7$PID7_w13 = as.character(all_waves_PID7$PID7_w13)
all_waves_PID7$PID7_w14_pre = as.character(all_waves_PID7$PID7_w14_pre)

##Converting categorical values to numeric
all_waves_PID7 = convert_to_numeric(all_waves_PID7)
all_waves_PID7 = convert_to_numeric_2(all_waves_PID7)

##Computing number of missing values
all_waves_PID7$number_missing = rowSums(is.na(all_waves_PID7[,1:9]))
summary(all_waves_PID7$number_missing) #Checking that the correct type of cases remain

##Adding wave 6 demographic predictors
w6_demographics = waves6710_15[, c('PPGENDER.x','PPINCIMP.x','ppreg4.x','PPETHM.x','PPAGE.x','PPEDUCAT.x','MNO')]
summary(w6_demographics)

all_waves_PID7 = cbind(all_waves_PID7,w6_demographics)
all_waves_PID7$PPINCIMP.x = sapply(all_waves_PID7$PPINCIMP.x, income)
all_waves_PID7$PPINCIMP.x_thousands = all_waves_PID7$PPINCIMP.x/1000 
all_waves_PID7$ppreg4.x = ifelse(is.na(all_waves_PID7$ppreg4.x),'Missing',all_waves_PID7$ppreg4.x)
all_waves_PID7$ppreg4.x = as.factor(ifelse(all_waves_PID7$ppreg4.x==3,"Northeast",ifelse(all_waves_PID7$ppreg4.x==4,"Midwest",ifelse(all_waves_PID7$ppreg4.x==5,"South",ifelse(all_waves_PID7$ppreg4.x==6,"West","Missing")))))
all_waves_PID7$ppreg4.x = relevel(all_waves_PID7$ppreg4.x, ref = "Northeast")
all_waves_PID7$PPAGE.x = as.numeric(all_waves_PID7$PPAGE.x)
all_waves_PID7$partisan_intensity_w6 = ifelse(all_waves_PID7$PID7_w6==3|all_waves_PID7$PID7_w6==-3,2,ifelse(all_waves_PID7$PID7_w6==2|all_waves_PID7$PID7_w6==-2,1,ifelse(all_waves_PID7$PID7_w6==1|all_waves_PID7$PID7_w6==0|all_waves_PID7$PID7_w6==-1,0,NA)))
table(all_waves_PID7$partisan_intensity_w6,all_waves_PID7$PID7_w6)
all_waves_PID7$democrat_w6 = ifelse(all_waves_PID7$PID7_w6<=0,1,0)
table(all_waves_PID7$democrat_w6,all_waves_PID7$PID7_w6)

##OLS regression equations
summary(lm(number_missing ~ PPGENDER.x, data = all_waves_PID7))
summary(lm(number_missing ~ PPINCIMP.x_thousands, data = all_waves_PID7))
summary(lm(number_missing ~ ppreg4.x, data = all_waves_PID7))
summary(lm(number_missing ~ PPETHM.x, data = all_waves_PID7))
summary(lm(number_missing ~ PPAGE.x, data = all_waves_PID7))
summary(lm(number_missing ~ PPEDUCAT.x, data = all_waves_PID7))
summary(lm(number_missing ~ partisan_intensity_w6, data = all_waves_PID7))
summary(lm(number_missing ~ democrat_w6, data = all_waves_PID7))

mean(all_waves_PID7$number_missing)



####Predicting Missingness (Question-wise)
##Creating a second dataframe for wave 14
wave14_pre = wave14

##Creating columns
wave6$wave = '6'
wave7$wave = '7'
wave10$wave = '10'
wave11$wave = '11'
wave12$wave = '12'
wave13$wave = '13'
wave14_pre$wave = '14_pre'
wave14$wave = '14'
wave15$wave = '15'

wave6$election_year_type = 'Presidential'
wave7$election_year_type = 'Presidential'
wave10$election_year_type = 'Off-cycle'
wave11$election_year_type = 'Presidential'
wave12$election_year_type = 'Presidential'
wave13$election_year_type = 'Midterm'
wave14_pre$election_year_type = 'Off-cycle'
wave14$election_year_type = 'Presidential'
wave15$election_year_type = 'Presidential'

wave6$election_season = 'Pre-election: Fall'
wave7$election_season = 'Post-election: Fall'
wave10$election_season = 'Pre-election: Summer'
wave11$election_season = 'Pre-election: Summer'
wave12$election_season = 'Post-election: Fall'
wave13$election_season = 'Pre-election: Fall'
wave14_pre$election_season = 'Pre-election: Summer'
wave14$election_season = 'Post-election: Winter'
wave15$election_season = 'Pre-election: Fall'

wave6$election_season_binary = 'Pre-election'
wave7$election_season_binary = 'Post-election'
wave10$election_season_binary = 'Pre-election'
wave11$election_season_binary = 'Pre-election'
wave12$election_season_binary = 'Post-election'
wave13$election_season_binary = 'Pre-election'
wave14_pre$election_season_binary = 'Pre-election'
wave14$election_season_binary = 'Post-election'
wave15$election_season_binary = 'Pre-election'

##Renaming columns
wave6$PID = wave6$PID7_w6
wave7$PID = wave7$PID7_w7
wave10$PID = wave10$PID7_w10_pre
wave11$PID = wave11$PID7_w11_pre
wave12$PID = wave12$PID7_w12
wave13$PID = wave13$PID7_w13
wave14_pre$PID = wave14$PID7_w14_pre
wave14$PID = wave14$PID7_w14
wave15$PID = wave15$PID7_w15

##Filtering by columns
wave6_truncated = wave6[, c('PID','wave','election_year_type','election_season','election_season_binary')]
wave7_truncated = wave7[, c('PID','wave','election_year_type','election_season','election_season_binary')]
wave10_truncated = wave10[, c('PID','wave','election_year_type','election_season','election_season_binary')]
wave11_truncated = wave11[, c('PID','wave','election_year_type','election_season','election_season_binary')]
wave12_truncated = wave12[, c('PID','wave','election_year_type','election_season','election_season_binary')]
wave13_truncated = wave13[, c('PID','wave','election_year_type','election_season','election_season_binary')]
wave14_pre_truncated = wave14_pre[, c('PID','wave','election_year_type','election_season','election_season_binary')]
wave14_truncated = wave14[, c('PID','wave','election_year_type','election_season','election_season_binary')]
wave15_truncated = wave15[, c('PID','wave','election_year_type','election_season','election_season_binary')]

##Converting PID to a binary indicator of missing/non-missing
wave6_truncated$PID_missing = ifelse(is.na(wave6_truncated$PID),1,0)
wave7_truncated$PID_missing = ifelse(is.na(wave7_truncated$PID),1,0)
wave10_truncated$PID_missing = ifelse(is.na(wave10_truncated$PID),1,0)
wave11_truncated$PID_missing = ifelse(is.na(wave11_truncated$PID),1,0)
wave12_truncated$PID_missing = ifelse(is.na(wave12_truncated$PID),1,0)
wave13_truncated$PID_missing = ifelse(is.na(wave13_truncated$PID),1,0)
wave14_pre_truncated$PID_missing = ifelse(is.na(wave14_pre_truncated$PID),1,0)
wave14_truncated$PID_missing = ifelse(is.na(wave14_truncated$PID),1,0)
wave15_truncated$PID_missing = ifelse(is.na(wave15_truncated$PID),1,0)

##Assembling question-wise data set
all_waves_truncated = rbind(wave6_truncated,wave7_truncated,wave10_truncated,wave11_truncated,wave12_truncated,wave13_truncated,wave14_pre_truncated,wave14_truncated,wave15_truncated)

all_waves_truncated$wave = as.factor(all_waves_truncated$wave)
all_waves_truncated$wave = relevel(all_waves_truncated$wave, ref = "6")

all_waves_truncated$election_season_binary = as.factor(all_waves_truncated$election_season_binary)
all_waves_truncated$election_season_binary = relevel(all_waves_truncated$election_season_binary, ref = "Pre-election")

all_waves_truncated$election_season = as.factor(all_waves_truncated$election_season)
all_waves_truncated$election_season = relevel(all_waves_truncated$election_season, ref = "Pre-election: Fall")

all_waves_truncated$election_year_type = as.factor(all_waves_truncated$election_year_type)
all_waves_truncated$election_year_type = relevel(all_waves_truncated$election_year_type, ref = "Presidential")

##Logistic regression equations
summary(glm(PID_missing ~ wave, family = binomial, data = all_waves_truncated))
summary(glm(PID_missing ~ election_season_binary, family = binomial, data = all_waves_truncated))
summary(glm(PID_missing ~ election_season, family = binomial, data = all_waves_truncated))
summary(glm(PID_missing ~ election_year_type, family = binomial, data = all_waves_truncated))

mean(all_waves_truncated$PID_missing)



####Time Series and IV Regressions
###Creating data set [COMPLETE CASES ONLY]
##Proposed final nine measures INNER JOIN
waves67 = merge(wave6,wave7, by="MNO")
waves6710 = merge(waves67,wave10, by="MNO")
waves6710_11 = merge(waves6710,wave11, by="MNO")
waves6710_12 = merge(waves6710_11,wave12, by="MNO")
waves6710_13 = merge(waves6710_12,wave13, by.x="MNO", by.y="mno")
waves6710_14 = merge(waves6710_13,wave14, by.x="MNO", by.y="mno")
waves6710_15 = merge(waves6710_14,wave15, by.x="MNO", by.y="mno")

##Filtering by rows
#Removing if PID3 value is not applicable
waves6710_15 = waves6710_15[waves6710_15$PID3_w6 %in% c("Democrat", "Independent", "Republican"), ]
waves6710_15 = waves6710_15[waves6710_15$PID3_w7 %in% c("Democrat", "Independent", "Republican"), ]
waves6710_15 = waves6710_15[waves6710_15$PID3_w12 %in% c("Democrat", "Independent", "Republican"), ]
waves6710_15 = waves6710_15[waves6710_15$PID3_w14 %in% c("Democrat", "Independent", "Republican"), ]
waves6710_15 = waves6710_15[waves6710_15$PID3_w15 %in% c("Democrat", "Independent", "Republican"), ]

waves6710_15 = waves6710_15[waves6710_15$PID3_w10_pre %in% c("Democrat", "Independent", "Republican"), ]
waves6710_15 = waves6710_15[waves6710_15$PID3_w11_pre %in% c("Democrat", "Independent", "Republican"), ]
waves6710_15 = waves6710_15[waves6710_15$PID3_w13 %in% c("Democrat", "Independent", "Republican"), ]
waves6710_15 = waves6710_15[waves6710_15$PID3_w14_pre %in% c("Democrat", "Independent", "Republican"), ]

#Removing if PID7 value is not applicable
waves6710_15 = waves6710_15[waves6710_15$PID7_w6 %in% c("Strong Democrat", "Not Strong Democrat", "Leans Democrat", "Undecided/Independent/Other", "Leans Republican", "Not Strong Republican", "Strong Republican"), ]
waves6710_15 = waves6710_15[waves6710_15$PID7_w7 %in% c("Strong Democrat", "Not Strong Democrat", "Leans Democrat", "Undecided/Independent/Other", "Leans Republican", "Not Strong Republican", "Strong Republican"), ]
waves6710_15 = waves6710_15[waves6710_15$PID7_w12 %in% c("Strong Democrat", "Not Strong Democrat", "Leans Democrat", "Undecided/Independent/Other", "Leans Republican", "Not Strong Republican", "Strong Republican"), ]
waves6710_15 = waves6710_15[waves6710_15$PID7_w14 %in% c("Strong Democrat", "Not Strong Democrat", "Leans Democrat", "Undecided/Independent/Other", "Leans Republican", "Not Strong Republican", "Strong Republican"), ]
waves6710_15 = waves6710_15[waves6710_15$PID7_w15 %in% c("Strong Democrat", "Not Strong Democrat", "Leans Democrat", "Undecided/Independent/Other", "Leans Republican", "Not Strong Republican", "Strong Republican"), ]

waves6710_15 = waves6710_15[waves6710_15$PID7_w10_pre %in% c("Strong Democrat", "Not Strong Democrat", "Leans Democrat", "Undecided/Independent/Other", "Leans Republican", "Not Strong Republican", "Strong Republican"), ]
waves6710_15 = waves6710_15[waves6710_15$PID7_w11_pre %in% c("Strong Democrat", "Not Strong Democrat", "Leans Democrat", "Undecided/Independent/Other", "Leans Republican", "Not Strong Republican", "Strong Republican"), ]
waves6710_15 = waves6710_15[waves6710_15$PID7_w13 %in% c("Strong Democrat", "Not Strong Democrat", "Leans Democrat", "Undecided/Independent/Other", "Leans Republican", "Not Strong Republican", "Strong Republican"), ]
waves6710_15 = waves6710_15[waves6710_15$PID7_w14_pre %in% c("Strong Democrat", "Not Strong Democrat", "Leans Democrat", "Undecided/Independent/Other", "Leans Republican", "Not Strong Republican", "Strong Republican"), ]

#Removing if pre-survey partisanship measure is not within the defined date range
waves6710_15 = waves6710_15[waves6710_15$pppadate_w10>=20150728 & waves6710_15$pppadate_w10<=20150903,] #The cut points could be extended to include more subjects
waves6710_15 = waves6710_15[waves6710_15$pppadate_w11>=20160715 & waves6710_15$pppadate_w11<=20160827,] #The cut points could be extended to include more subjects
waves6710_15 = waves6710_15[waves6710_15$pppadate_w14>=20190611 & waves6710_15$pppadate_w14<=20190820,] #The cut points could be extended to include more subjects

##Creating multiple dataframes
complete_cases_w6710_PID7 = waves6710_15[, c('PID7_w6','PID7_w7','PID7_w10_pre')]
complete_cases_w71011_PID7 = waves6710_15[, c('PID7_w7','PID7_w10_pre','PID7_w11_pre')]
complete_cases_w101112_PID7 = waves6710_15[, c('PID7_w10_pre','PID7_w11_pre','PID7_w12')]
complete_cases_w111213_PID7 = waves6710_15[, c('PID7_w11_pre','PID7_w12','PID7_w13')]
complete_cases_w121314_PID7 = waves6710_15[, c('PID7_w12','PID7_w13','PID7_w14_pre')]
complete_cases_w131414_PID7 = waves6710_15[, c('PID7_w13','PID7_w14_pre','PID7_w14')]
complete_cases_w141415_PID7 = waves6710_15[, c('PID7_w14_pre','PID7_w14','PID7_w15')]

names(complete_cases_w6710_PID7)[1:3] <- c('PID_t_0','PID_t_1','PID_t_2')
names(complete_cases_w71011_PID7)[1:3] <- c('PID_t_0','PID_t_1','PID_t_2')
names(complete_cases_w101112_PID7)[1:3] <- c('PID_t_0','PID_t_1','PID_t_2')
names(complete_cases_w111213_PID7)[1:3] <- c('PID_t_0','PID_t_1','PID_t_2')
names(complete_cases_w121314_PID7)[1:3] <- c('PID_t_0','PID_t_1','PID_t_2')
names(complete_cases_w131414_PID7)[1:3] <- c('PID_t_0','PID_t_1','PID_t_2')
names(complete_cases_w141415_PID7)[1:3] <- c('PID_t_0','PID_t_1','PID_t_2')

complete_cases_w6710_PID3 = waves6710_15[, c('PID3_w6','PID3_w7','PID3_w10_pre')]
complete_cases_w71011_PID3 = waves6710_15[, c('PID3_w7','PID3_w10_pre','PID3_w11_pre')]
complete_cases_w101112_PID3 = waves6710_15[, c('PID3_w10_pre','PID3_w11_pre','PID3_w12')]
complete_cases_w111213_PID3 = waves6710_15[, c('PID3_w11_pre','PID3_w12','PID3_w13')]
complete_cases_w121314_PID3 = waves6710_15[, c('PID3_w12','PID3_w13','PID3_w14_pre')]
complete_cases_w131414_PID3 = waves6710_15[, c('PID3_w13','PID3_w14_pre','PID3_w14')]
complete_cases_w141415_PID3 = waves6710_15[, c('PID3_w14_pre','PID3_w14','PID3_w15')]

names(complete_cases_w6710_PID3)[1:3] <- c('PID_t_0','PID_t_1','PID_t_2')
names(complete_cases_w71011_PID3)[1:3] <- c('PID_t_0','PID_t_1','PID_t_2')
names(complete_cases_w101112_PID3)[1:3] <- c('PID_t_0','PID_t_1','PID_t_2')
names(complete_cases_w111213_PID3)[1:3] <- c('PID_t_0','PID_t_1','PID_t_2')
names(complete_cases_w121314_PID3)[1:3] <- c('PID_t_0','PID_t_1','PID_t_2')
names(complete_cases_w131414_PID3)[1:3] <- c('PID_t_0','PID_t_1','PID_t_2')
names(complete_cases_w141415_PID3)[1:3] <- c('PID_t_0','PID_t_1','PID_t_2')

##Creating combined dataframes
complete_cases_rbind_PID7 = rbind(complete_cases_w6710_PID7,complete_cases_w71011_PID7,complete_cases_w101112_PID7,complete_cases_w111213_PID7,complete_cases_w121314_PID7,complete_cases_w131414_PID7,complete_cases_w141415_PID7)
complete_cases_rbind_PID7$PID_t_0 = as.character(complete_cases_rbind_PID7$PID_t_0)
complete_cases_rbind_PID7$PID_t_1 = as.character(complete_cases_rbind_PID7$PID_t_1)
complete_cases_rbind_PID7$PID_t_2 = as.character(complete_cases_rbind_PID7$PID_t_2)

complete_cases_w6710_PID7$PID_t_0 = as.character(complete_cases_w6710_PID7$PID_t_0)
complete_cases_w6710_PID7$PID_t_1 = as.character(complete_cases_w6710_PID7$PID_t_1)
complete_cases_w6710_PID7$PID_t_2 = as.character(complete_cases_w6710_PID7$PID_t_2)

complete_cases_w111213_PID7$PID_t_0 = as.character(complete_cases_w111213_PID7$PID_t_0)
complete_cases_w111213_PID7$PID_t_1 = as.character(complete_cases_w111213_PID7$PID_t_1)
complete_cases_w111213_PID7$PID_t_2 = as.character(complete_cases_w111213_PID7$PID_t_2)

complete_cases_w141415_PID7$PID_t_0 = as.character(complete_cases_w141415_PID7$PID_t_0)
complete_cases_w141415_PID7$PID_t_1 = as.character(complete_cases_w141415_PID7$PID_t_1)
complete_cases_w141415_PID7$PID_t_2 = as.character(complete_cases_w141415_PID7$PID_t_2)

complete_cases_rbind_PID3 = rbind(complete_cases_w6710_PID3,complete_cases_w71011_PID3,complete_cases_w101112_PID3,complete_cases_w111213_PID3,complete_cases_w121314_PID3,complete_cases_w131414_PID3,complete_cases_w141415_PID3)
complete_cases_rbind_PID3$PID_t_0 = as.character(complete_cases_rbind_PID3$PID_t_0)
complete_cases_rbind_PID3$PID_t_1 = as.character(complete_cases_rbind_PID3$PID_t_1)
complete_cases_rbind_PID3$PID_t_2 = as.character(complete_cases_rbind_PID3$PID_t_2)

complete_cases_w6710_PID3$PID_t_0 = as.character(complete_cases_w6710_PID3$PID_t_0)
complete_cases_w6710_PID3$PID_t_1 = as.character(complete_cases_w6710_PID3$PID_t_1)
complete_cases_w6710_PID3$PID_t_2 = as.character(complete_cases_w6710_PID3$PID_t_2)

complete_cases_w111213_PID3$PID_t_0 = as.character(complete_cases_w111213_PID3$PID_t_0)
complete_cases_w111213_PID3$PID_t_1 = as.character(complete_cases_w111213_PID3$PID_t_1)
complete_cases_w111213_PID3$PID_t_2 = as.character(complete_cases_w111213_PID3$PID_t_2)

complete_cases_w141415_PID3$PID_t_0 = as.character(complete_cases_w141415_PID3$PID_t_0)
complete_cases_w141415_PID3$PID_t_1 = as.character(complete_cases_w141415_PID3$PID_t_1)
complete_cases_w141415_PID3$PID_t_2 = as.character(complete_cases_w141415_PID3$PID_t_2)

##Converting categorical values to numeric
complete_cases_rbind_PID7 = convert_to_numeric(complete_cases_rbind_PID7)
complete_cases_rbind_PID7 = convert_to_numeric_2(complete_cases_rbind_PID7)

complete_cases_w6710_PID7 = convert_to_numeric(complete_cases_w6710_PID7)
complete_cases_w6710_PID7 = convert_to_numeric_2(complete_cases_w6710_PID7)

complete_cases_w111213_PID7 = convert_to_numeric(complete_cases_w111213_PID7)
complete_cases_w111213_PID7 = convert_to_numeric_2(complete_cases_w111213_PID7)

complete_cases_w141415_PID7 = convert_to_numeric(complete_cases_w141415_PID7)
complete_cases_w141415_PID7 = convert_to_numeric_2(complete_cases_w141415_PID7)

complete_cases_rbind_PID3 = convert_to_numeric(complete_cases_rbind_PID3)
complete_cases_rbind_PID3 = convert_to_numeric_2(complete_cases_rbind_PID3)

complete_cases_w6710_PID3 = convert_to_numeric(complete_cases_w6710_PID3)
complete_cases_w6710_PID3 = convert_to_numeric_2(complete_cases_w6710_PID3)

complete_cases_w111213_PID3 = convert_to_numeric(complete_cases_w111213_PID3)
complete_cases_w111213_PID3 = convert_to_numeric_2(complete_cases_w111213_PID3)

complete_cases_w141415_PID3 = convert_to_numeric(complete_cases_w141415_PID3)
complete_cases_w141415_PID3 = convert_to_numeric_2(complete_cases_w141415_PID3)

##Adding a wave column
w6 = as.data.frame(rep('6',nrow(complete_cases_w6710_PID7)))
names(w6)[1] <- 'wave'
w7 = as.data.frame(rep('7',nrow(complete_cases_w71011_PID7)))
names(w7)[1] <- 'wave'
w10_pre = as.data.frame(rep('10_pre',nrow(complete_cases_w101112_PID7)))
names(w10_pre)[1] <- 'wave'
w11_pre = as.data.frame(rep('11_pre',nrow(complete_cases_w111213_PID7)))
names(w11_pre)[1] <- 'wave'
w12 = as.data.frame(rep('12',nrow(complete_cases_w121314_PID7)))
names(w12)[1] <- 'wave'
w13 = as.data.frame(rep('13',nrow(complete_cases_w131414_PID7)))
names(w13)[1] <- 'wave'
w14_pre = as.data.frame(rep('14_pre',nrow(complete_cases_w141415_PID7)))
names(w14_pre)[1] <- 'wave'

complete_cases_rbind_PID7_waves = rbind(w6,w7,w10_pre,w11_pre,w12,w13,w14_pre)
complete_cases_rbind_PID7 = cbind(complete_cases_rbind_PID7,complete_cases_rbind_PID7_waves)
summary(complete_cases_rbind_PID7)

complete_cases_rbind_PID3_waves = rbind(w6,w7,w10_pre,w11_pre,w12,w13,w14_pre)
complete_cases_rbind_PID3 = cbind(complete_cases_rbind_PID3,complete_cases_rbind_PID3_waves)
summary(complete_cases_rbind_PID3)

##Time Series Regressions
summary(lm(PID_t_2 ~ PID_t_1, data=complete_cases_rbind_PID7))
summary(lm(PID_t_2 ~ PID_t_1 + PID_t_0, data=complete_cases_rbind_PID7))
#summary(lm(PID_t_2 ~ PID_t_1 + PID_t_0 + wave, data=complete_cases_rbind_PID7))

summary(lm(PID_t_2 ~ PID_t_1, data=complete_cases_w6710_PID7))
summary(lm(PID_t_2 ~ PID_t_1 + PID_t_0, data=complete_cases_w6710_PID7))

summary(lm(PID_t_2 ~ PID_t_1, data=complete_cases_w111213_PID7))
summary(lm(PID_t_2 ~ PID_t_1 + PID_t_0, data=complete_cases_w111213_PID7))

summary(lm(PID_t_2 ~ PID_t_1, data=complete_cases_w141415_PID7))
summary(lm(PID_t_2 ~ PID_t_1 + PID_t_0, data=complete_cases_w141415_PID7))

summary(lm(PID_t_2 ~ PID_t_1, data=complete_cases_rbind_PID3))
summary(lm(PID_t_2 ~ PID_t_1 + PID_t_0, data=complete_cases_rbind_PID3))
#summary(lm(PID_t_2 ~ PID_t_1 + PID_t_0 + wave, data=complete_cases_rbind_PID3))

summary(lm(PID_t_2 ~ PID_t_1, data=complete_cases_w6710_PID3))
summary(lm(PID_t_2 ~ PID_t_1 + PID_t_0, data=complete_cases_w6710_PID3))

summary(lm(PID_t_2 ~ PID_t_1, data=complete_cases_w111213_PID3))
summary(lm(PID_t_2 ~ PID_t_1 + PID_t_0, data=complete_cases_w111213_PID3))

summary(lm(PID_t_2 ~ PID_t_1, data=complete_cases_w141415_PID3))
summary(lm(PID_t_2 ~ PID_t_1 + PID_t_0, data=complete_cases_w141415_PID3))

##IV Regressions
library('ivreg')
summary(ivreg(PID_t_2 ~ PID_t_1, ~ PID_t_0, data=complete_cases_rbind_PID7))
summary(ivreg(PID_t_2 ~ PID_t_1, ~ PID_t_0, data=complete_cases_w6710_PID7))
summary(ivreg(PID_t_2 ~ PID_t_1, ~ PID_t_0, data=complete_cases_w111213_PID7))
summary(ivreg(PID_t_2 ~ PID_t_1, ~ PID_t_0, data=complete_cases_w141415_PID7))

summary(ivreg(PID_t_2 ~ PID_t_1, ~ PID_t_0, data=complete_cases_rbind_PID3))
summary(ivreg(PID_t_2 ~ PID_t_1, ~ PID_t_0, data=complete_cases_w6710_PID3))
summary(ivreg(PID_t_2 ~ PID_t_1, ~ PID_t_0, data=complete_cases_w111213_PID3))
summary(ivreg(PID_t_2 ~ PID_t_1, ~ PID_t_0, data=complete_cases_w141415_PID3))



####Replicating Ansolabehere, Rodden, and Snyder (2008) by taking simple averages
###Creating data set [COMPLETE CASES ONLY]
##Proposed final nine measures INNER JOIN
waves67 = merge(wave6,wave7, by="MNO")
waves6710 = merge(waves67,wave10, by="MNO")
waves6710_11 = merge(waves6710,wave11, by="MNO")
waves6710_12 = merge(waves6710_11,wave12, by="MNO")
waves6710_13 = merge(waves6710_12,wave13, by.x="MNO", by.y="mno")
waves6710_14 = merge(waves6710_13,wave14, by.x="MNO", by.y="mno")
waves6710_15 = merge(waves6710_14,wave15, by.x="MNO", by.y="mno")

##Filtering by rows
#Removing if PID3 value is not applicable
waves6710_15 = waves6710_15[waves6710_15$PID3_w6 %in% c("Democrat", "Independent", "Republican"), ]
waves6710_15 = waves6710_15[waves6710_15$PID3_w7 %in% c("Democrat", "Independent", "Republican"), ]
waves6710_15 = waves6710_15[waves6710_15$PID3_w12 %in% c("Democrat", "Independent", "Republican"), ]
waves6710_15 = waves6710_15[waves6710_15$PID3_w14 %in% c("Democrat", "Independent", "Republican"), ]
waves6710_15 = waves6710_15[waves6710_15$PID3_w15 %in% c("Democrat", "Independent", "Republican"), ]

waves6710_15 = waves6710_15[waves6710_15$PID3_w10_pre %in% c("Democrat", "Independent", "Republican"), ]
waves6710_15 = waves6710_15[waves6710_15$PID3_w11_pre %in% c("Democrat", "Independent", "Republican"), ]
waves6710_15 = waves6710_15[waves6710_15$PID3_w13 %in% c("Democrat", "Independent", "Republican"), ]
waves6710_15 = waves6710_15[waves6710_15$PID3_w14_pre %in% c("Democrat", "Independent", "Republican"), ]

#Removing if PID7 value is not applicable
waves6710_15 = waves6710_15[waves6710_15$PID7_w6 %in% c("Strong Democrat", "Not Strong Democrat", "Leans Democrat", "Undecided/Independent/Other", "Leans Republican", "Not Strong Republican", "Strong Republican"), ]
waves6710_15 = waves6710_15[waves6710_15$PID7_w7 %in% c("Strong Democrat", "Not Strong Democrat", "Leans Democrat", "Undecided/Independent/Other", "Leans Republican", "Not Strong Republican", "Strong Republican"), ]
waves6710_15 = waves6710_15[waves6710_15$PID7_w12 %in% c("Strong Democrat", "Not Strong Democrat", "Leans Democrat", "Undecided/Independent/Other", "Leans Republican", "Not Strong Republican", "Strong Republican"), ]
waves6710_15 = waves6710_15[waves6710_15$PID7_w14 %in% c("Strong Democrat", "Not Strong Democrat", "Leans Democrat", "Undecided/Independent/Other", "Leans Republican", "Not Strong Republican", "Strong Republican"), ]
waves6710_15 = waves6710_15[waves6710_15$PID7_w15 %in% c("Strong Democrat", "Not Strong Democrat", "Leans Democrat", "Undecided/Independent/Other", "Leans Republican", "Not Strong Republican", "Strong Republican"), ]

waves6710_15 = waves6710_15[waves6710_15$PID7_w10_pre %in% c("Strong Democrat", "Not Strong Democrat", "Leans Democrat", "Undecided/Independent/Other", "Leans Republican", "Not Strong Republican", "Strong Republican"), ]
waves6710_15 = waves6710_15[waves6710_15$PID7_w11_pre %in% c("Strong Democrat", "Not Strong Democrat", "Leans Democrat", "Undecided/Independent/Other", "Leans Republican", "Not Strong Republican", "Strong Republican"), ]
waves6710_15 = waves6710_15[waves6710_15$PID7_w13 %in% c("Strong Democrat", "Not Strong Democrat", "Leans Democrat", "Undecided/Independent/Other", "Leans Republican", "Not Strong Republican", "Strong Republican"), ]
waves6710_15 = waves6710_15[waves6710_15$PID7_w14_pre %in% c("Strong Democrat", "Not Strong Democrat", "Leans Democrat", "Undecided/Independent/Other", "Leans Republican", "Not Strong Republican", "Strong Republican"), ]

#Removing if pre-survey partisanship measure is not within the defined date range
waves6710_15 = waves6710_15[waves6710_15$pppadate_w10>=20150728 & waves6710_15$pppadate_w10<=20150903,] #The cut points could be extended to include more subjects
waves6710_15 = waves6710_15[waves6710_15$pppadate_w11>=20160715 & waves6710_15$pppadate_w11<=20160827,] #The cut points could be extended to include more subjects
waves6710_15 = waves6710_15[waves6710_15$pppadate_w14>=20190611 & waves6710_15$pppadate_w14<=20190820,] #The cut points could be extended to include more subjects

##Filtering by columns
all_waves_PID7_triplets = waves6710_15[, c('PID7_w6','PID7_w7','PID7_w10_pre','PID7_w11_pre','PID7_w12','PID7_w13','PID7_w14_pre','PID7_w14','PID7_w15')]
all_waves_PID7_triplets$PID7_w10_pre = as.character(all_waves_PID7_triplets$PID7_w10_pre)
all_waves_PID7_triplets$PID7_w11_pre = as.character(all_waves_PID7_triplets$PID7_w11_pre)
all_waves_PID7_triplets$PID7_w13 = as.character(all_waves_PID7_triplets$PID7_w13)
all_waves_PID7_triplets$PID7_w14_pre = as.character(all_waves_PID7_triplets$PID7_w14_pre)

all_waves_PID3_triplets = waves6710_15[, c('PID3_w6','PID3_w7','PID3_w10_pre','PID3_w11_pre','PID3_w12','PID3_w13','PID3_w14_pre','PID3_w14','PID3_w15')]
all_waves_PID3_triplets$PID3_w6 = as.character(all_waves_PID3_triplets$PID3_w6)
all_waves_PID3_triplets$PID3_w7 = as.character(all_waves_PID3_triplets$PID3_w7)
all_waves_PID3_triplets$PID3_w12 = as.character(all_waves_PID3_triplets$PID3_w12)
all_waves_PID3_triplets$PID3_w13 = as.character(all_waves_PID3_triplets$PID3_w13)
all_waves_PID3_triplets$PID3_w14 = as.character(all_waves_PID3_triplets$PID3_w14)
all_waves_PID3_triplets$PID3_w15 = as.character(all_waves_PID3_triplets$PID3_w15)

##Converting categorical values to numeric
all_waves_PID7_triplets = convert_to_numeric(all_waves_PID7_triplets)
all_waves_PID7_triplets = convert_to_numeric_2(all_waves_PID7_triplets)

all_waves_PID3_triplets = convert_to_numeric(all_waves_PID3_triplets)
all_waves_PID3_triplets = convert_to_numeric_2(all_waves_PID3_triplets)

##Creating dataframes
all_waves_PID7_triplets$PID7_w6_w7_w10_pre = apply(all_waves_PID7_triplets[,1:3], 1, mean, na.rm = TRUE)
all_waves_PID7_triplets$PID7_w11_pre_w12_w13 = apply(all_waves_PID7_triplets[,4:6], 1, mean, na.rm = TRUE)
all_waves_PID7_triplets$PID7_w14_pre_w14_w15 = apply(all_waves_PID7_triplets[,7:9], 1, mean, na.rm = TRUE)
PID7_triplets = all_waves_PID7_triplets[,10:12]

all_waves_PID7_71214 = all_waves_PID7_triplets[, c('PID7_w7','PID7_w12','PID7_w14')]

all_waves_PID3_triplets$PID3_w6_w7_w10_pre = apply(all_waves_PID3_triplets[,1:3], 1, mean, na.rm = TRUE)
all_waves_PID3_triplets$PID3_w11_pre_w12_w13 = apply(all_waves_PID3_triplets[,4:6], 1, mean, na.rm = TRUE)
all_waves_PID3_triplets$PID3_w14_pre_w14_w15 = apply(all_waves_PID3_triplets[,7:9], 1, mean, na.rm = TRUE)
PID3_triplets = all_waves_PID3_triplets[,10:12]

all_waves_PID3_71214 = all_waves_PID3_triplets[, c('PID3_w7','PID3_w12','PID3_w14')]

##Computing statistics
#Correlations
cor_PID7 = round(cor(PID7_triplets, use="pairwise.complete.obs"),3)
cor_PID3 = round(cor(PID3_triplets, use="pairwise.complete.obs"),3)

lower.tri(cor_PID7, diag = FALSE)
upper.tri(cor_PID7, diag = FALSE)
lower_7<-cor_PID7
lower_7[lower.tri(cor_PID7, diag=TRUE)]=""
lower_7<-as.data.frame(lower_7)
lower_7
write.csv(lower_7, "./total_cases_PID7_triplets_correlations.csv")

lower.tri(cor_PID3, diag = FALSE)
upper.tri(cor_PID3, diag = FALSE)
lower_3<-cor_PID3
lower_3[lower.tri(cor_PID3, diag=TRUE)]=""
lower_3<-as.data.frame(lower_3)
lower_3
write.csv(lower_3, "./total_cases_PID3_triplets_correlations.csv")

cor_PID7 = round(cor(all_waves_PID7_71214, use="pairwise.complete.obs"),3)
cor_PID3 = round(cor(all_waves_PID3_71214, use="pairwise.complete.obs"),3)

lower.tri(cor_PID7, diag = FALSE)
upper.tri(cor_PID7, diag = FALSE)
lower_7<-cor_PID7
lower_7[lower.tri(cor_PID7, diag=TRUE)]=""
lower_7<-as.data.frame(lower_7)
lower_7
write.csv(lower_7, "./total_cases_PID7_71214_correlations.csv")

lower.tri(cor_PID3, diag = FALSE)
upper.tri(cor_PID3, diag = FALSE)
lower_3<-cor_PID3
lower_3[lower.tri(cor_PID3, diag=TRUE)]=""
lower_3<-as.data.frame(lower_3)
lower_3
write.csv(lower_3, "./total_cases_PID3_71214_correlations.csv")

#Time Series Regressions
summary(lm(PID7_w14_pre_w14_w15 ~ PID7_w11_pre_w12_w13, data=PID7_triplets))
summary(lm(PID7_w14_pre_w14_w15 ~ PID7_w11_pre_w12_w13 + PID7_w6_w7_w10_pre, data=PID7_triplets))

summary(lm(PID3_w14_pre_w14_w15 ~ PID3_w11_pre_w12_w13, data=PID3_triplets))
summary(lm(PID3_w14_pre_w14_w15 ~ PID3_w11_pre_w12_w13 + PID3_w6_w7_w10_pre, data=PID3_triplets))

summary(lm(PID7_w14 ~ PID7_w12, data=all_waves_PID7_71214))
summary(lm(PID7_w14 ~ PID7_w12 + PID7_w7, data=all_waves_PID7_71214))

summary(lm(PID3_w14 ~ PID3_w12, data=all_waves_PID3_71214))
summary(lm(PID3_w14 ~ PID3_w12 + PID3_w7, data=all_waves_PID3_71214))

#IV Regressions
library('ivreg')
summary(ivreg(PID7_w14_pre_w14_w15 ~ PID7_w11_pre_w12_w13, ~ PID7_w6_w7_w10_pre, data=PID7_triplets))

summary(ivreg(PID3_w14_pre_w14_w15 ~ PID3_w11_pre_w12_w13, ~ PID3_w6_w7_w10_pre, data=PID3_triplets))

summary(ivreg(PID7_w14 ~ PID7_w12, ~ PID7_w7, data=all_waves_PID7_71214))

summary(ivreg(PID3_w14 ~ PID3_w12, ~ PID3_w7, data=all_waves_PID3_71214))



####Predicting Individual-level Standard Deviations [COMPLETE CASES ONLY]
##Proposed final nine measures INNER JOIN
waves67 = merge(wave6,wave7, by="MNO")
waves6710 = merge(waves67,wave10, by="MNO")
waves6710_11 = merge(waves6710,wave11, by="MNO")
waves6710_12 = merge(waves6710_11,wave12, by="MNO")
waves6710_13 = merge(waves6710_12,wave13, by.x="MNO", by.y="mno")
waves6710_14 = merge(waves6710_13,wave14, by.x="MNO", by.y="mno")
waves6710_15 = merge(waves6710_14,wave15, by.x="MNO", by.y="mno")

##Filtering by rows
#Removing if PID3 value is not applicable
waves6710_15 = waves6710_15[waves6710_15$PID3_w6 %in% c("Democrat", "Independent", "Republican"), ]
waves6710_15 = waves6710_15[waves6710_15$PID3_w7 %in% c("Democrat", "Independent", "Republican"), ]
waves6710_15 = waves6710_15[waves6710_15$PID3_w12 %in% c("Democrat", "Independent", "Republican"), ]
waves6710_15 = waves6710_15[waves6710_15$PID3_w14 %in% c("Democrat", "Independent", "Republican"), ]
waves6710_15 = waves6710_15[waves6710_15$PID3_w15 %in% c("Democrat", "Independent", "Republican"), ]

waves6710_15 = waves6710_15[waves6710_15$PID3_w10_pre %in% c("Democrat", "Independent", "Republican"), ]
waves6710_15 = waves6710_15[waves6710_15$PID3_w11_pre %in% c("Democrat", "Independent", "Republican"), ]
waves6710_15 = waves6710_15[waves6710_15$PID3_w13 %in% c("Democrat", "Independent", "Republican"), ]
waves6710_15 = waves6710_15[waves6710_15$PID3_w14_pre %in% c("Democrat", "Independent", "Republican"), ]

#Removing if PID7 value is not applicable
waves6710_15 = waves6710_15[waves6710_15$PID7_w6 %in% c("Strong Democrat", "Not Strong Democrat", "Leans Democrat", "Undecided/Independent/Other", "Leans Republican", "Not Strong Republican", "Strong Republican"), ]
waves6710_15 = waves6710_15[waves6710_15$PID7_w7 %in% c("Strong Democrat", "Not Strong Democrat", "Leans Democrat", "Undecided/Independent/Other", "Leans Republican", "Not Strong Republican", "Strong Republican"), ]
waves6710_15 = waves6710_15[waves6710_15$PID7_w12 %in% c("Strong Democrat", "Not Strong Democrat", "Leans Democrat", "Undecided/Independent/Other", "Leans Republican", "Not Strong Republican", "Strong Republican"), ]
waves6710_15 = waves6710_15[waves6710_15$PID7_w14 %in% c("Strong Democrat", "Not Strong Democrat", "Leans Democrat", "Undecided/Independent/Other", "Leans Republican", "Not Strong Republican", "Strong Republican"), ]
waves6710_15 = waves6710_15[waves6710_15$PID7_w15 %in% c("Strong Democrat", "Not Strong Democrat", "Leans Democrat", "Undecided/Independent/Other", "Leans Republican", "Not Strong Republican", "Strong Republican"), ]

waves6710_15 = waves6710_15[waves6710_15$PID7_w10_pre %in% c("Strong Democrat", "Not Strong Democrat", "Leans Democrat", "Undecided/Independent/Other", "Leans Republican", "Not Strong Republican", "Strong Republican"), ]
waves6710_15 = waves6710_15[waves6710_15$PID7_w11_pre %in% c("Strong Democrat", "Not Strong Democrat", "Leans Democrat", "Undecided/Independent/Other", "Leans Republican", "Not Strong Republican", "Strong Republican"), ]
waves6710_15 = waves6710_15[waves6710_15$PID7_w13 %in% c("Strong Democrat", "Not Strong Democrat", "Leans Democrat", "Undecided/Independent/Other", "Leans Republican", "Not Strong Republican", "Strong Republican"), ]
waves6710_15 = waves6710_15[waves6710_15$PID7_w14_pre %in% c("Strong Democrat", "Not Strong Democrat", "Leans Democrat", "Undecided/Independent/Other", "Leans Republican", "Not Strong Republican", "Strong Republican"), ]

#Removing if pre-survey partisanship measure is not within the defined date range
waves6710_15 = waves6710_15[waves6710_15$pppadate_w10>=20150728 & waves6710_15$pppadate_w10<=20150903,] #The cut points could be extended to include more subjects
waves6710_15 = waves6710_15[waves6710_15$pppadate_w11>=20160715 & waves6710_15$pppadate_w11<=20160827,] #The cut points could be extended to include more subjects
waves6710_15 = waves6710_15[waves6710_15$pppadate_w14>=20190611 & waves6710_15$pppadate_w14<=20190820,] #The cut points could be extended to include more subjects

##Filtering by columns
all_waves_PID7 = waves6710_15[, c('PID7_w6','PID7_w7','PID7_w10_pre','PID7_w11_pre','PID7_w12','PID7_w13','PID7_w14_pre','PID7_w14','PID7_w15')]
all_waves_PID7$PID7_w10_pre = as.character(all_waves_PID7$PID7_w10_pre)
all_waves_PID7$PID7_w11_pre = as.character(all_waves_PID7$PID7_w11_pre)
all_waves_PID7$PID7_w13 = as.character(all_waves_PID7$PID7_w13)
all_waves_PID7$PID7_w14_pre = as.character(all_waves_PID7$PID7_w14_pre)

all_waves_PID3 = waves6710_15[, c('PID3_w6','PID3_w7','PID3_w10_pre','PID3_w11_pre','PID3_w12','PID3_w13','PID3_w14_pre','PID3_w14','PID3_w15')]
all_waves_PID3$PID3_w6 = as.character(all_waves_PID3$PID3_w6)
all_waves_PID3$PID3_w7 = as.character(all_waves_PID3$PID3_w7)
all_waves_PID3$PID3_w12 = as.character(all_waves_PID3$PID3_w12)
all_waves_PID3$PID3_w13 = as.character(all_waves_PID3$PID3_w13)
all_waves_PID3$PID3_w14 = as.character(all_waves_PID3$PID3_w14)
all_waves_PID3$PID3_w15 = as.character(all_waves_PID3$PID3_w15)

##Converting categorical values to numeric
all_waves_PID7 = convert_to_numeric(all_waves_PID7)
all_waves_PID3 = convert_to_numeric(all_waves_PID3)

all_waves_PID7 = convert_to_numeric_2(all_waves_PID7)
all_waves_PID3 = convert_to_numeric_2(all_waves_PID3)

##Adding Means and SDs (Individuals)
all_waves_PID7$Mean_PID7 = apply(all_waves_PID7[,1:9], 1, mean)
all_waves_PID7$SD_PID7 = apply(all_waves_PID7[,1:9], 1, sd)

all_waves_PID3$Mean_PID3 = apply(all_waves_PID3[,1:9], 1, mean)
all_waves_PID3$SD_PID3 = apply(all_waves_PID3[,1:9], 1, sd)

##Adding wave 6 demographic predictors
w6_demographics = waves6710_15[, c('PPGENDER.x','PPINCIMP.x','ppreg4.x','PPETHM.x','PPAGE.x','PPEDUCAT.x','MNO')]
summary(w6_demographics)

all_waves = cbind(all_waves_PID7,all_waves_PID3,w6_demographics)
all_waves$PPINCIMP.x = sapply(all_waves$PPINCIMP.x, income)
all_waves$PPINCIMP.x_thousands = all_waves$PPINCIMP.x/1000 
all_waves$ppreg4.x = ifelse(is.na(all_waves$ppreg4.x),'Missing',all_waves$ppreg4.x)
all_waves$ppreg4.x = as.factor(ifelse(all_waves$ppreg4.x==3,"Northeast",ifelse(all_waves$ppreg4.x==4,"Midwest",ifelse(all_waves$ppreg4.x==5,"South",ifelse(all_waves$ppreg4.x==6,"West","Missing")))))
all_waves$ppreg4.x = relevel(all_waves$ppreg4.x, ref = "Northeast")
all_waves$PPAGE.x = as.numeric(all_waves$PPAGE.x)
all_waves$partisan_intensity_w6 = ifelse(all_waves$PID7_w6==3|all_waves$PID7_w6==-3,2,ifelse(all_waves$PID7_w6==2|all_waves$PID7_w6==-2,1,ifelse(all_waves$PID7_w6==1|all_waves$PID7_w6==0|all_waves$PID7_w6==-1,0,NA)))
table(all_waves$partisan_intensity_w6,all_waves$PID7_w6)
all_waves$democrat_w6 = ifelse(all_waves$PID7_w6<=0,1,0)
table(all_waves$democrat_w6,all_waves$PID7_w6)

##OLS regression equations
summary(lm(SD_PID7 ~ PPGENDER.x, data = all_waves)) #Gender is not associated with standard deviation
summary(lm(SD_PID7 ~ PPINCIMP.x_thousands, data = all_waves)) #Income is not associated with standard deviation
summary(lm(SD_PID7 ~ ppreg4.x, data = all_waves)) #Region is not associated with standard deviation
summary(lm(SD_PID7 ~ PPETHM.x, data = all_waves)) #Ethnicity is not associated with standard deviation
summary(lm(SD_PID7 ~ PPAGE.x, data = all_waves)) #Age is marginally negatively associated with standard deviation
summary(lm(SD_PID7 ~ PPEDUCAT.x, data = all_waves)) #Education is not associated with standard deviation
summary(lm(SD_PID7 ~ partisan_intensity_w6, data = all_waves)) #Partisan intensity is strongly negatively associated with standard deviation
summary(lm(SD_PID7 ~ democrat_w6, data = all_waves)) #Partisanship is not associated with standard deviation

summary(all_waves$SD_PID7)
summary(all_waves$SD_PID3)
hist(all_waves$SD_PID7)
hist(all_waves$SD_PID3)

mean(all_waves$SD_PID7)
mean(all_waves$SD_PID3)



####Estimating Drift In Aggregate
##Creating a second dataframe for wave 14
wave14_pre = wave14

##Renaming columns
wave6$PID_7 = wave6$PID7_w6
wave7$PID_7 = wave7$PID7_w7
wave10$PID_7 = wave10$PID7_w10_pre
wave11$PID_7 = wave11$PID7_w11_pre
wave12$PID_7 = wave12$PID7_w12
wave13$PID_7 = wave13$PID7_w13
wave14_pre$PID_7 = wave14$PID7_w14_pre
wave14$PID_7 = wave14$PID7_w14
wave15$PID_7 = wave15$PID7_w15

wave6$PID_3 = wave6$PID3_w6
wave7$PID_3 = wave7$PID3_w7
wave10$PID_3 = wave10$PID3_w10_pre
wave11$PID_3 = wave11$PID3_w11_pre
wave12$PID_3 = wave12$PID3_w12
wave13$PID_3 = wave13$PID3_w13
wave14_pre$PID_3 = wave14$PID3_w14_pre
wave14$PID_3 = wave14$PID3_w14
wave15$PID_3 = wave15$PID3_w15

##Filtering by columns
wave6_truncated = wave6[, c('PID_7','PID_3')]
wave7_truncated = wave7[, c('PID_7','PID_3')]
wave10_truncated = wave10[, c('PID_7','PID_3')]
wave11_truncated = wave11[, c('PID_7','PID_3')]
wave12_truncated = wave12[, c('PID_7','PID_3')]
wave13_truncated = wave13[, c('PID_7','PID_3')]
wave14_pre_truncated = wave14_pre[, c('PID_7','PID_3')]
wave14_truncated = wave14[, c('PID_7','PID_3')]
wave15_truncated = wave15[, c('PID_7','PID_3')]

##Assembling question-wise data set
all_waves_truncated = rbind(wave6_truncated,wave7_truncated,wave10_truncated,wave11_truncated,wave12_truncated,wave13_truncated,wave14_pre_truncated,wave14_truncated,wave15_truncated)
all_waves_truncated$PID_3 = as.character(all_waves_truncated$PID_3)

##Converting categorical values to numeric
all_waves_truncated = convert_to_numeric(all_waves_truncated)
all_waves_truncated = convert_to_numeric_2(all_waves_truncated)

##Adding wave columns
w6 = as.data.frame(cbind(rep(0,nrow(wave6)),rep(0,nrow(wave6))))
names(w6) <- c('wave_counter','years_since_origin')
w7 = as.data.frame(cbind(rep(1,nrow(wave7)),rep(0.167,nrow(wave7))))
names(w7) <- c('wave_counter','years_since_origin')
w10_pre = as.data.frame(cbind(rep(2,nrow(wave10)),rep(2.833,nrow(wave10))))
names(w10_pre) <- c('wave_counter','years_since_origin')
w11_pre = as.data.frame(cbind(rep(3,nrow(wave11)),rep(3.833,nrow(wave11))))
names(w11_pre) <- c('wave_counter','years_since_origin')
w12 = as.data.frame(cbind(rep(4,nrow(wave12)),rep(4.083,nrow(wave12))))
names(w12) <- c('wave_counter','years_since_origin')
w13 = as.data.frame(cbind(rep(5,nrow(wave13)),rep(6,nrow(wave13))))
names(w13) <- c('wave_counter','years_since_origin')
w14_pre = as.data.frame(cbind(rep(6,nrow(wave14_pre)),rep(6.75,nrow(wave14_pre))))
names(w14_pre) <- c('wave_counter','years_since_origin')
w14 = as.data.frame(cbind(rep(7,nrow(wave14)),rep(7.25,nrow(wave14))))
names(w14) <- c('wave_counter','years_since_origin')
w15 = as.data.frame(cbind(rep(8,nrow(wave15)),rep(8,nrow(wave15))))
names(w15) <- c('wave_counter','years_since_origin')

waves = rbind(w6,w7,w10_pre,w11_pre,w12,w13,w14_pre,w14,w15)
all_waves_truncated = cbind(all_waves_truncated,waves)

##OLS Regressions
summary(lm(PID_7 ~ wave_counter, data=all_waves_truncated))
summary(lm(PID_3 ~ wave_counter, data=all_waves_truncated))

summary(lm(PID_7 ~ years_since_origin, data=all_waves_truncated))
summary(lm(PID_3 ~ years_since_origin, data=all_waves_truncated))



####Estimating Drift Respondent-Wise [COMPLETE CASES ONLY]
##Proposed final nine measures INNER JOIN
waves67 = merge(wave6,wave7, by="MNO")
waves6710 = merge(waves67,wave10, by="MNO")
waves6710_11 = merge(waves6710,wave11, by="MNO")
waves6710_12 = merge(waves6710_11,wave12, by="MNO")
waves6710_13 = merge(waves6710_12,wave13, by.x="MNO", by.y="mno")
waves6710_14 = merge(waves6710_13,wave14, by.x="MNO", by.y="mno")
waves6710_15 = merge(waves6710_14,wave15, by.x="MNO", by.y="mno")

##Filtering by rows
#Removing if PID3 value is not applicable
waves6710_15 = waves6710_15[waves6710_15$PID3_w6 %in% c("Democrat", "Independent", "Republican"), ]
waves6710_15 = waves6710_15[waves6710_15$PID3_w7 %in% c("Democrat", "Independent", "Republican"), ]
waves6710_15 = waves6710_15[waves6710_15$PID3_w12 %in% c("Democrat", "Independent", "Republican"), ]
waves6710_15 = waves6710_15[waves6710_15$PID3_w14 %in% c("Democrat", "Independent", "Republican"), ]
waves6710_15 = waves6710_15[waves6710_15$PID3_w15 %in% c("Democrat", "Independent", "Republican"), ]

waves6710_15 = waves6710_15[waves6710_15$PID3_w10_pre %in% c("Democrat", "Independent", "Republican"), ]
waves6710_15 = waves6710_15[waves6710_15$PID3_w11_pre %in% c("Democrat", "Independent", "Republican"), ]
waves6710_15 = waves6710_15[waves6710_15$PID3_w13 %in% c("Democrat", "Independent", "Republican"), ]
waves6710_15 = waves6710_15[waves6710_15$PID3_w14_pre %in% c("Democrat", "Independent", "Republican"), ]

#Removing if PID7 value is not applicable
waves6710_15 = waves6710_15[waves6710_15$PID7_w6 %in% c("Strong Democrat", "Not Strong Democrat", "Leans Democrat", "Undecided/Independent/Other", "Leans Republican", "Not Strong Republican", "Strong Republican"), ]
waves6710_15 = waves6710_15[waves6710_15$PID7_w7 %in% c("Strong Democrat", "Not Strong Democrat", "Leans Democrat", "Undecided/Independent/Other", "Leans Republican", "Not Strong Republican", "Strong Republican"), ]
waves6710_15 = waves6710_15[waves6710_15$PID7_w12 %in% c("Strong Democrat", "Not Strong Democrat", "Leans Democrat", "Undecided/Independent/Other", "Leans Republican", "Not Strong Republican", "Strong Republican"), ]
waves6710_15 = waves6710_15[waves6710_15$PID7_w14 %in% c("Strong Democrat", "Not Strong Democrat", "Leans Democrat", "Undecided/Independent/Other", "Leans Republican", "Not Strong Republican", "Strong Republican"), ]
waves6710_15 = waves6710_15[waves6710_15$PID7_w15 %in% c("Strong Democrat", "Not Strong Democrat", "Leans Democrat", "Undecided/Independent/Other", "Leans Republican", "Not Strong Republican", "Strong Republican"), ]

waves6710_15 = waves6710_15[waves6710_15$PID7_w10_pre %in% c("Strong Democrat", "Not Strong Democrat", "Leans Democrat", "Undecided/Independent/Other", "Leans Republican", "Not Strong Republican", "Strong Republican"), ]
waves6710_15 = waves6710_15[waves6710_15$PID7_w11_pre %in% c("Strong Democrat", "Not Strong Democrat", "Leans Democrat", "Undecided/Independent/Other", "Leans Republican", "Not Strong Republican", "Strong Republican"), ]
waves6710_15 = waves6710_15[waves6710_15$PID7_w13 %in% c("Strong Democrat", "Not Strong Democrat", "Leans Democrat", "Undecided/Independent/Other", "Leans Republican", "Not Strong Republican", "Strong Republican"), ]
waves6710_15 = waves6710_15[waves6710_15$PID7_w14_pre %in% c("Strong Democrat", "Not Strong Democrat", "Leans Democrat", "Undecided/Independent/Other", "Leans Republican", "Not Strong Republican", "Strong Republican"), ]

#Removing if pre-survey partisanship measure is not within the defined date range
waves6710_15 = waves6710_15[waves6710_15$pppadate_w10>=20150728 & waves6710_15$pppadate_w10<=20150903,] #The cut points could be extended to include more subjects
waves6710_15 = waves6710_15[waves6710_15$pppadate_w11>=20160715 & waves6710_15$pppadate_w11<=20160827,] #The cut points could be extended to include more subjects
waves6710_15 = waves6710_15[waves6710_15$pppadate_w14>=20190611 & waves6710_15$pppadate_w14<=20190820,] #The cut points could be extended to include more subjects

##Filtering by columns
all_waves_PID7 = waves6710_15[, c('PID7_w6','PID7_w7','PID7_w10_pre','PID7_w11_pre','PID7_w12','PID7_w13','PID7_w14_pre','PID7_w14','PID7_w15')]
all_waves_PID7$PID7_w10_pre = as.character(all_waves_PID7$PID7_w10_pre)
all_waves_PID7$PID7_w11_pre = as.character(all_waves_PID7$PID7_w11_pre)
all_waves_PID7$PID7_w13 = as.character(all_waves_PID7$PID7_w13)
all_waves_PID7$PID7_w14_pre = as.character(all_waves_PID7$PID7_w14_pre)

all_waves_PID3 = waves6710_15[, c('PID3_w6','PID3_w7','PID3_w10_pre','PID3_w11_pre','PID3_w12','PID3_w13','PID3_w14_pre','PID3_w14','PID3_w15')]
all_waves_PID3$PID3_w6 = as.character(all_waves_PID3$PID3_w6)
all_waves_PID3$PID3_w7 = as.character(all_waves_PID3$PID3_w7)
all_waves_PID3$PID3_w12 = as.character(all_waves_PID3$PID3_w12)
all_waves_PID3$PID3_w13 = as.character(all_waves_PID3$PID3_w13)
all_waves_PID3$PID3_w14 = as.character(all_waves_PID3$PID3_w14)
all_waves_PID3$PID3_w15 = as.character(all_waves_PID3$PID3_w15)

##Converting categorical values to numeric
all_waves_PID7 = convert_to_numeric(all_waves_PID7)
all_waves_PID3 = convert_to_numeric(all_waves_PID3)

all_waves_PID7 = convert_to_numeric_2(all_waves_PID7)
all_waves_PID3 = convert_to_numeric_2(all_waves_PID3)

##Determining how many respondents have identical values throughout the nine-wave time series
identical_PID7 = rep(NA,nrow(all_waves_PID7))
for (i in 1:nrow(all_waves_PID7)){
  num = length(unique(as.list(all_waves_PID7[i,])))
  if(num==1){
    identical_PID7[i] = 1
  }
  else{
    identical_PID7[i] = 0
  }
}

sum(identical_PID7)/365 #0.466

identical_PID3 = rep(NA,nrow(all_waves_PID3))
for (i in 1:nrow(all_waves_PID3)){
  num = length(unique(as.list(all_waves_PID3[i,])))
  if(num==1){
    identical_PID3[i] = 1
  }
  else{
    identical_PID3[i] = 0
  }
}

sum(identical_PID3)/365 #0.723

##Appending MNO column
all_waves_PID7 = cbind(waves6710_15$MNO,all_waves_PID7)
names(all_waves_PID7)[1] = 'MNO'

all_waves_PID3 = cbind(waves6710_15$MNO,all_waves_PID3)
names(all_waves_PID3)[1] = 'MNO'

##Creating long form dataframes
library(tidyr)
all_waves_PID_7_long = gather(all_waves_PID7,PID_wave,PID_7,PID7_w6:PID7_w15,factor_key=TRUE)
all_waves_PID_7_long = all_waves_PID_7_long[order(all_waves_PID_7_long$MNO),]

all_waves_PID_3_long = gather(all_waves_PID3,PID_wave,PID_3,PID3_w6:PID3_w15,factor_key=TRUE)
all_waves_PID_3_long = all_waves_PID_3_long[order(all_waves_PID_3_long$MNO),]

##Adding wave columns
waves = as.data.frame(cbind(rep(c(0,1,2,3,4,5,6,7,8),365),rep(c(0,0.167,2.833,3.833,4.083,6,6.75,7.25,8),365)))
names(waves) <- c('wave_counter','years_since_origin')

all_waves_PID_7_long = cbind(all_waves_PID_7_long,waves)
all_waves_PID_3_long = cbind(all_waves_PID_3_long,waves)

##OLS Regressions
#wave_counter
PID_7_drift_coef = c()
PID_7_drift_p_value = c()
MNO_set_PID_7 = c()
for (i in all_waves_PID_7_long$MNO){
  if (is.element(i,MNO_set_PID_7)){
    next
  }
  else{
    df = subset(all_waves_PID_7_long, MNO==i)
    lm = lm(PID_7 ~ wave_counter, data=df)
    coef = summary(lm)$coefficients[2,1]
    coef2 = summary(lm)$coefficients[2,4]
    PID_7_drift_coef = c(PID_7_drift_coef, coef)
    PID_7_drift_p_value = c(PID_7_drift_p_value,coef2)
    MNO_set_PID_7 = c(MNO_set_PID_7,i)
  }
}

PID_3_drift_coef = c()
PID_3_drift_p_value = c()
MNO_set_PID_3 = c()
for (i in all_waves_PID_3_long$MNO){
  if (is.element(i,MNO_set_PID_3)){
    next
  }
  else{
    df = subset(all_waves_PID_3_long, MNO==i)
    lm = lm(PID_3 ~ wave_counter, data=df)
    coef = summary(lm)$coefficients[2,1]
    coef2 = summary(lm)$coefficients[2,4]
    PID_3_drift_coef = c(PID_3_drift_coef, coef)
    PID_3_drift_p_value = c(PID_3_drift_p_value,coef2)
    MNO_set_PID_3 = c(MNO_set_PID_3,i)
  }
}

hist(PID_7_drift_coef, breaks=50)
summary(PID_7_drift_coef)
threshold_val = 1/8 #This is the coefficient value required to drift one-full point over the full time-series.
counter_threshold_PID7 = 0
for(i in PID_7_drift_coef){
  if(is.nan(i)){
    next
  }
  else{
    if(abs(i>=threshold_val)){
      counter_threshold_PID7 = counter_threshold_PID7+1
    }
  }
}

counter_threshold_PID7/nrow(all_waves_PID7) #.123

hist(PID_7_drift_p_value, breaks=50)
summary(PID_7_drift_p_value)
counter_PID7 = 0
for(i in PID_7_drift_p_value){
  if(is.nan(i)){
    next
  }
  else{
    if(i<0.05){
      counter_PID7 = counter_PID7+1
    }
  }
}

hist(PID_3_drift_coef, breaks=50)
threshold_val = 1/8 #This is the coefficient value required to drift one-full point over the full time-series.
summary(PID_3_drift_coef)
counter_threshold_PID3 = 0
for(i in PID_3_drift_coef){
  if(is.nan(i)){
    next
  }
  else{
    if(abs(i>=threshold_val)){
      counter_threshold_PID3 = counter_threshold_PID3+1
    }
  }
}

counter_threshold_PID3/nrow(all_waves_PID3) #.044

hist(PID_3_drift_p_value, breaks=50)
summary(PID_3_drift_p_value)
counter_PID3 = 0
for(i in PID_3_drift_p_value){
  if(is.nan(i)){
    next
  }
  else{
    if(i<0.05){
      counter_PID3 = counter_PID3+1
    }
  }
}

#Counting respondents whose coefficients are exactly 0
sum(ifelse(PID_7_drift_coef==0,1,0))/nrow(all_waves_PID7) #0.466
sum(ifelse(PID_3_drift_coef==0,1,0))/nrow(all_waves_PID3) #0.726

#years_since_origin
PID_7_drift_coef = c()
PID_7_drift_p_value = c()
MNO_set_PID_7 = c()
for (i in all_waves_PID_7_long$MNO){
  if (is.element(i,MNO_set_PID_7)){
    next
  }
  else{
    df = subset(all_waves_PID_7_long, MNO==i)
    lm = lm(PID_7 ~ years_since_origin, data=df)
    coef = summary(lm)$coefficients[2,1]
    coef2 = summary(lm)$coefficients[2,4]
    PID_7_drift_coef = c(PID_7_drift_coef, coef)
    PID_7_drift_p_value = c(PID_7_drift_p_value,coef2)
    MNO_set_PID_7 = c(MNO_set_PID_7,i)
  }
}

PID_3_drift_coef = c()
PID_3_drift_p_value = c()
MNO_set_PID_3 = c()
for (i in all_waves_PID_3_long$MNO){
  if (is.element(i,MNO_set_PID_3)){
    next
  }
  else{
    df = subset(all_waves_PID_3_long, MNO==i)
    lm = lm(PID_3 ~ years_since_origin, data=df)
    coef = summary(lm)$coefficients[2,1]
    coef2 = summary(lm)$coefficients[2,4]
    PID_3_drift_coef = c(PID_3_drift_coef, coef)
    PID_3_drift_p_value = c(PID_3_drift_p_value,coef2)
    MNO_set_PID_3 = c(MNO_set_PID_3,i)
  }
}

hist(PID_7_drift_coef, breaks=50)
summary(PID_7_drift_coef)
threshold_val = 1/8 #This is the coefficient value required to drift one-full point over the full time-series.
counter_threshold_PID7 = 0
for(i in PID_7_drift_coef){
  if(is.nan(i)){
    next
  }
  else{
    if(abs(i>=threshold_val)){
      counter_threshold_PID7 = counter_threshold_PID7+1
    }
  }
}

counter_threshold_PID7/nrow(all_waves_PID7) #.112

hist(PID_7_drift_p_value, breaks=50)
summary(PID_7_drift_p_value)
counter_PID7 = 0
for(i in PID_7_drift_p_value){
  if(is.nan(i)){
    next
  }
  else{
    if(i<0.05){
      counter_PID7 = counter_PID7+1
    }
  }
}

hist(PID_3_drift_coef, breaks=50)
summary(PID_3_drift_coef)
threshold_val = 1/8 #This is the coefficient value required to drift one-full point over the full time-series.
counter_threshold_PID3 = 0
for(i in PID_3_drift_coef){
  if(is.nan(i)){
    next
  }
  else{
    if(abs(i>=0.125)){
      counter_threshold_PID3 = counter_threshold_PID3+1
    }
  }
}

counter_threshold_PID3/nrow(all_waves_PID3) #.030

hist(PID_3_drift_p_value, breaks=50)
summary(PID_3_drift_p_value)
counter_PID3 = 0
for(i in PID_3_drift_p_value){
  if(is.nan(i)){
    next
  }
  else{
    if(i<0.05){
      counter_PID3 = counter_PID3+1
    }
  }
}

#Counting respondents whose coefficients are exactly 0
sum(ifelse(PID_7_drift_coef==0,1,0))/nrow(all_waves_PID7) #0.466
sum(ifelse(PID_3_drift_coef==0,1,0))/nrow(all_waves_PID3) #0.723

#Visualizations for paper
library(ggplot2)
PID_7_drift_coef_df = as.data.frame(PID_7_drift_coef)
PID_3_drift_coef_df = as.data.frame(PID_3_drift_coef)

ggplot(PID_7_drift_coef_df, aes(x=PID_7_drift_coef)) +
  geom_histogram(color="black", binwidth=0.025) + 
  ggtitle("") + 
  theme_classic() +
  theme(plot.title = element_text(hjust = 0.5), panel.grid.major = element_blank(), panel.grid.minor = element_blank(), panel.background = element_blank()) +
  xlab("OLS Coefficients (Slopes)") +
  ylab("Frequency") +
  xlim(-1,1)

#Saving in high resolution
ggsave(filename = "./ISCAP_Trajectories_Wave Count_Narrow Bins_090922.png", height = 5.7, width = 15.29, units = c("cm"), device='png', dpi=600)



####Computing Implied Wiley-Wiley (1970) R^2s
ww1970_r_squares = rep(NA,6) 
for (i in 2:7){
  ww1970_r_squares[i-1] = wiley_r_squared(all_waves_PID7,i)
}

ww1970_r_squares



####Cross-Panel Analysis
###ISCAP VS. VSG
##Creating data set [COMPLETE CASES ONLY]
waves7_12 = merge(wave7,wave12, by="MNO")
waves7_12_13 = merge(waves7_12,wave13, by.x="MNO", by.y="mno")

#Filtering by rows
#Removing if PID7 value is not applicable
waves7_12_13 = waves7_12_13[waves7_12_13$PID7_w7 %in% c("Strong Democrat", "Not Strong Democrat", "Leans Democrat", "Undecided/Independent/Other", "Leans Republican", "Not Strong Republican", "Strong Republican"), ]
waves7_12_13 = waves7_12_13[waves7_12_13$PID7_w12 %in% c("Strong Democrat", "Not Strong Democrat", "Leans Democrat", "Undecided/Independent/Other", "Leans Republican", "Not Strong Republican", "Strong Republican"), ]
waves7_12_13 = waves7_12_13[waves7_12_13$PID7_w13 %in% c("Strong Democrat", "Not Strong Democrat", "Leans Democrat", "Undecided/Independent/Other", "Leans Republican", "Not Strong Republican", "Strong Republican"), ]

#Filtering by columns
all_waves_PID7_ISCAP_vs_VSG = waves7_12_13[, c('PID7_w7','PID7_w12','PID7_w13')]
all_waves_PID7_ISCAP_vs_VSG$PID7_w13 = as.character(all_waves_PID7_ISCAP_vs_VSG$PID7_w13)

#Converting categorical values to numeric
all_waves_PID7_ISCAP_vs_VSG = convert_to_numeric(all_waves_PID7_ISCAP_vs_VSG)
all_waves_PID7_ISCAP_vs_VSG = convert_to_numeric_2(all_waves_PID7_ISCAP_vs_VSG)

##Computing statistics
#Correlations
cor_PID7_listwise = round(cor(all_waves_PID7_ISCAP_vs_VSG),3)

lower.tri(cor_PID7_listwise, diag = FALSE)
upper.tri(cor_PID7_listwise, diag = FALSE)
lower_7_listwise<-cor_PID7_listwise
lower_7_listwise[lower.tri(cor_PID7_listwise, diag=TRUE)]=""
lower_7_listwise<-as.data.frame(lower_7_listwise)
lower_7_listwise
write.csv(lower_7_listwise, "./ISCAP_vs_VSG_PID7_correlations.csv")

#Means and SDs (Individuals)
all_waves_PID7_ISCAP_vs_VSG$Mean_PID7 = apply(all_waves_PID7_ISCAP_vs_VSG[,1:3], 1, mean)
all_waves_PID7_ISCAP_vs_VSG$SD_PID7 = apply(all_waves_PID7_ISCAP_vs_VSG[,1:3], 1, sd)

summary(all_waves_PID7_ISCAP_vs_VSG$Mean_PID7)
summary(all_waves_PID7_ISCAP_vs_VSG$SD_PID7)

#Means and SDs (Waves)
as.data.frame(round(apply(all_waves_PID7_ISCAP_vs_VSG, 2, mean, na.rm=TRUE),2))
as.data.frame(round(apply(all_waves_PID7_ISCAP_vs_VSG, 2, sd, na.rm=TRUE),2))

##Time Series and IV Regressions
#OLS Regressions
summary(lm(PID7_w12 ~ PID7_w7, data = all_waves_PID7_ISCAP_vs_VSG))
summary(lm(PID7_w13 ~ PID7_w12, data = all_waves_PID7_ISCAP_vs_VSG))

#IV Regressions
library('ivreg')
summary(ivreg(PID7_w13 ~ PID7_w12, ~ PID7_w7, data = all_waves_PID7_ISCAP_vs_VSG))

###ISCAP VS. TAPS
##Creating data set [COMPLETE CASES ONLY]
waves67 = merge(wave6,wave7, by="MNO")
waves67_10 = merge(waves67,wave10, by="MNO")
waves67_10_12 = merge(waves67_10,wave12, by="MNO")

#Filtering by rows
#Removing if PID7 value is not applicable
waves67_10_12 = waves67_10_12[waves67_10_12$PID7_w6 %in% c("Strong Democrat", "Not Strong Democrat", "Leans Democrat", "Undecided/Independent/Other", "Leans Republican", "Not Strong Republican", "Strong Republican"), ]
waves67_10_12 = waves67_10_12[waves67_10_12$PID7_w7 %in% c("Strong Democrat", "Not Strong Democrat", "Leans Democrat", "Undecided/Independent/Other", "Leans Republican", "Not Strong Republican", "Strong Republican"), ]
waves67_10_12 = waves67_10_12[waves67_10_12$PID7_w12 %in% c("Strong Democrat", "Not Strong Democrat", "Leans Democrat", "Undecided/Independent/Other", "Leans Republican", "Not Strong Republican", "Strong Republican"), ]
waves67_10_12 = waves67_10_12[waves67_10_12$PID7_w10_pre %in% c("Strong Democrat", "Not Strong Democrat", "Leans Democrat", "Undecided/Independent/Other", "Leans Republican", "Not Strong Republican", "Strong Republican"), ]

#Removing if pre-survey partisanship measure is not within the defined date range
waves67_10_12 = waves67_10_12[waves67_10_12$pppadate_w10>=20150728 & waves67_10_12$pppadate_w10<=20150903,] #The cut points could be extended to include more subjects

##Filtering by columns
all_waves_PID7_ISCAP_vs_TAPS = waves67_10_12[, c('PID7_w6','PID7_w7','PID7_w10_pre','PID7_w12')]
all_waves_PID7_ISCAP_vs_TAPS$PID7_w10_pre = as.character(all_waves_PID7_ISCAP_vs_TAPS$PID7_w10_pre)

#Converting categorical values to numeric
all_waves_PID7_ISCAP_vs_TAPS = convert_to_numeric(all_waves_PID7_ISCAP_vs_TAPS)
all_waves_PID7_ISCAP_vs_TAPS = convert_to_numeric_2(all_waves_PID7_ISCAP_vs_TAPS)

##Computing statistics
#Correlations
cor_PID7_listwise = round(cor(all_waves_PID7_ISCAP_vs_TAPS),3)

lower.tri(cor_PID7_listwise, diag = FALSE)
upper.tri(cor_PID7_listwise, diag = FALSE)
lower_7_listwise<-cor_PID7_listwise
lower_7_listwise[lower.tri(cor_PID7_listwise, diag=TRUE)]=""
lower_7_listwise<-as.data.frame(lower_7_listwise)
lower_7_listwise
write.csv(lower_7_listwise, "./ISCAP_vs_TAPS_PID7_correlations.csv")

#Means and SDs (Individuals)
all_waves_PID7_ISCAP_vs_TAPS$Mean_PID7 = apply(all_waves_PID7_ISCAP_vs_TAPS[,1:4], 1, mean)
all_waves_PID7_ISCAP_vs_TAPS$SD_PID7 = apply(all_waves_PID7_ISCAP_vs_TAPS[,1:4], 1, sd)

summary(all_waves_PID7_ISCAP_vs_TAPS$Mean_PID7)
summary(all_waves_PID7_ISCAP_vs_TAPS$SD_PID7)

#Means and SDs (Waves)
as.data.frame(round(apply(all_waves_PID7_ISCAP_vs_TAPS, 2, mean, na.rm=TRUE),2))
as.data.frame(round(apply(all_waves_PID7_ISCAP_vs_TAPS, 2, sd, na.rm=TRUE),2))

##Time Series and IV Regressions
#OLS Regressions
summary(lm(PID7_w7 ~ PID7_w6, data = all_waves_PID7_ISCAP_vs_TAPS))
summary(lm(PID7_w10_pre ~ PID7_w7, data = all_waves_PID7_ISCAP_vs_TAPS))
summary(lm(PID7_w12 ~ PID7_w10_pre, data = all_waves_PID7_ISCAP_vs_TAPS))

#IV Regressions
library('ivreg')
summary(ivreg(PID7_w10_pre ~ PID7_w7, ~ PID7_w6, data = all_waves_PID7_ISCAP_vs_TAPS))
summary(ivreg(PID7_w12 ~ PID7_w10_pre, ~ PID7_w7, data = all_waves_PID7_ISCAP_vs_TAPS))

#Computing Implied Wiley-Wiley (1970) R^2s
wiley_r_squared(all_waves_PID7_ISCAP_vs_TAPS,2)



####Six Presentation Waves
###Creating data set [COMPLETE CASES ONLY]
##INNER JOIN
waves67 = merge(wave6,wave7, by="MNO")
waves6710 = merge(waves67,wave10, by="MNO")
waves6710_11 = merge(waves6710,wave11, by="MNO")
waves6710_12 = merge(waves6710_11,wave12, by="MNO")
waves6710_13 = merge(waves6710_12,wave13, by.x="MNO", by.y="mno")
waves6710_14 = merge(waves6710_13,wave14, by.x="MNO", by.y="mno")
waves6710_15 = merge(waves6710_14,wave15, by.x="MNO", by.y="mno")

##Filtering by rows
#Removing if PID3 value is not applicable
waves6710_15 = waves6710_15[waves6710_15$PID3_w6 %in% c("Democrat", "Independent", "Republican"), ]
waves6710_15 = waves6710_15[waves6710_15$PID3_w7 %in% c("Democrat", "Independent", "Republican"), ]
waves6710_15 = waves6710_15[waves6710_15$PID3_w12 %in% c("Democrat", "Independent", "Republican"), ]
waves6710_15 = waves6710_15[waves6710_15$PID3_w14 %in% c("Democrat", "Independent", "Republican"), ]
waves6710_15 = waves6710_15[waves6710_15$PID3_w15 %in% c("Democrat", "Independent", "Republican"), ]

waves6710_15 = waves6710_15[waves6710_15$PID3_w10_pre %in% c("Democrat", "Independent", "Republican"), ]
waves6710_15 = waves6710_15[waves6710_15$PID3_w11_pre %in% c("Democrat", "Independent", "Republican"), ]
waves6710_15 = waves6710_15[waves6710_15$PID3_w13 %in% c("Democrat", "Independent", "Republican"), ]
waves6710_15 = waves6710_15[waves6710_15$PID3_w14_pre %in% c("Democrat", "Independent", "Republican"), ]

#Removing if PID7 value is not applicable
waves6710_15 = waves6710_15[waves6710_15$PID7_w6 %in% c("Strong Democrat", "Not Strong Democrat", "Leans Democrat", "Undecided/Independent/Other", "Leans Republican", "Not Strong Republican", "Strong Republican"), ]
waves6710_15 = waves6710_15[waves6710_15$PID7_w7 %in% c("Strong Democrat", "Not Strong Democrat", "Leans Democrat", "Undecided/Independent/Other", "Leans Republican", "Not Strong Republican", "Strong Republican"), ]
waves6710_15 = waves6710_15[waves6710_15$PID7_w12 %in% c("Strong Democrat", "Not Strong Democrat", "Leans Democrat", "Undecided/Independent/Other", "Leans Republican", "Not Strong Republican", "Strong Republican"), ]
waves6710_15 = waves6710_15[waves6710_15$PID7_w14 %in% c("Strong Democrat", "Not Strong Democrat", "Leans Democrat", "Undecided/Independent/Other", "Leans Republican", "Not Strong Republican", "Strong Republican"), ]
waves6710_15 = waves6710_15[waves6710_15$PID7_w15 %in% c("Strong Democrat", "Not Strong Democrat", "Leans Democrat", "Undecided/Independent/Other", "Leans Republican", "Not Strong Republican", "Strong Republican"), ]

waves6710_15 = waves6710_15[waves6710_15$PID7_w10_pre %in% c("Strong Democrat", "Not Strong Democrat", "Leans Democrat", "Undecided/Independent/Other", "Leans Republican", "Not Strong Republican", "Strong Republican"), ]
waves6710_15 = waves6710_15[waves6710_15$PID7_w11_pre %in% c("Strong Democrat", "Not Strong Democrat", "Leans Democrat", "Undecided/Independent/Other", "Leans Republican", "Not Strong Republican", "Strong Republican"), ]
waves6710_15 = waves6710_15[waves6710_15$PID7_w13 %in% c("Strong Democrat", "Not Strong Democrat", "Leans Democrat", "Undecided/Independent/Other", "Leans Republican", "Not Strong Republican", "Strong Republican"), ]
waves6710_15 = waves6710_15[waves6710_15$PID7_w14_pre %in% c("Strong Democrat", "Not Strong Democrat", "Leans Democrat", "Undecided/Independent/Other", "Leans Republican", "Not Strong Republican", "Strong Republican"), ]

#Removing if pre-survey partisanship measure is not within the defined date range
waves6710_15 = waves6710_15[waves6710_15$pppadate_w10>=20150728 & waves6710_15$pppadate_w10<=20150903,] #The cut points could be extended to include more subjects
waves6710_15 = waves6710_15[waves6710_15$pppadate_w11>=20160715 & waves6710_15$pppadate_w11<=20160827,] #The cut points could be extended to include more subjects
waves6710_15 = waves6710_15[waves6710_15$pppadate_w14>=20190611 & waves6710_15$pppadate_w14<=20190820,] #The cut points could be extended to include more subjects

##Filtering by columns
all_waves_PID7 = waves6710_15[, c('PID7_w6','PID7_w10_pre','PID7_w11_pre','PID7_w13','PID7_w14_pre','PID7_w15')]
all_waves_PID7$PID7_w10_pre = as.character(all_waves_PID7$PID7_w10_pre)
all_waves_PID7$PID7_w11_pre = as.character(all_waves_PID7$PID7_w11_pre)
all_waves_PID7$PID7_w13 = as.character(all_waves_PID7$PID7_w13)
all_waves_PID7$PID7_w14_pre = as.character(all_waves_PID7$PID7_w14_pre)

##Converting categorical values to numeric
all_waves_PID7 = convert_to_numeric(all_waves_PID7)
all_waves_PID7 = convert_to_numeric_2(all_waves_PID7)

##Time Series and IV Regressions
#OLS Regressions
summary(lm(PID7_w10_pre ~ PID7_w6, data = all_waves_PID7))
summary(lm(PID7_w11_pre ~ PID7_w10_pre, data = all_waves_PID7))
summary(lm(PID7_w13 ~ PID7_w11_pre, data = all_waves_PID7))
summary(lm(PID7_w14_pre ~ PID7_w13, data = all_waves_PID7))
summary(lm(PID7_w15 ~ PID7_w14_pre, data = all_waves_PID7))

#IV Regressions
library('ivreg')
summary(ivreg(PID7_w11_pre ~ PID7_w10_pre, ~ PID7_w6, data = all_waves_PID7))
summary(ivreg(PID7_w13 ~ PID7_w11_pre, ~ PID7_w10_pre, data = all_waves_PID7))
summary(ivreg(PID7_w14_pre ~ PID7_w13, ~ PID7_w11_pre, data = all_waves_PID7))
summary(ivreg(PID7_w15 ~ PID7_w14_pre, ~ PID7_w13, data = all_waves_PID7))

#Computing Implied Wiley-Wiley (1970) R^2s
ww1970_r_squares = rep(NA,3) 
for (i in 2:4){
  ww1970_r_squares[i-1] = wiley_r_squared(all_waves_PID7,i)
}

ww1970_r_squares



####Six Presentation Waves [INCLUDING WEIGHTS]
###Creating data set [COMPLETE CASES ONLY]
##INNER JOIN
waves67 = merge(wave6,wave7, by="MNO")
waves6710 = merge(waves67,wave10, by="MNO")
waves6710_11 = merge(waves6710,wave11, by="MNO")
waves6710_13 = merge(waves6710_11,wave13, by.x="MNO", by.y="mno")
waves6710_15 = merge(waves6710_13,wave15, by.x="MNO", by.y="mno")
waves6710_12 = merge(waves6710_15,wave12, by="MNO")
waves6710_14 = merge(waves6710_12,wave14, by.x="MNO", by.y="mno")
waves6710_15 = waves6710_14

##Filtering by rows
#Removing if PID7 value is not applicable
waves6710_15 = waves6710_15[waves6710_15$PID7_w6 %in% c("Strong Democrat", "Not Strong Democrat", "Leans Democrat", "Undecided/Independent/Other", "Leans Republican", "Not Strong Republican", "Strong Republican"), ]
waves6710_15 = waves6710_15[waves6710_15$PID7_w7 %in% c("Strong Democrat", "Not Strong Democrat", "Leans Democrat", "Undecided/Independent/Other", "Leans Republican", "Not Strong Republican", "Strong Republican"), ]
waves6710_15 = waves6710_15[waves6710_15$PID7_w12 %in% c("Strong Democrat", "Not Strong Democrat", "Leans Democrat", "Undecided/Independent/Other", "Leans Republican", "Not Strong Republican", "Strong Republican"), ]
waves6710_15 = waves6710_15[waves6710_15$PID7_w14 %in% c("Strong Democrat", "Not Strong Democrat", "Leans Democrat", "Undecided/Independent/Other", "Leans Republican", "Not Strong Republican", "Strong Republican"), ]
waves6710_15 = waves6710_15[waves6710_15$PID7_w15 %in% c("Strong Democrat", "Not Strong Democrat", "Leans Democrat", "Undecided/Independent/Other", "Leans Republican", "Not Strong Republican", "Strong Republican"), ]

waves6710_15 = waves6710_15[waves6710_15$PID7_w10_pre %in% c("Strong Democrat", "Not Strong Democrat", "Leans Democrat", "Undecided/Independent/Other", "Leans Republican", "Not Strong Republican", "Strong Republican"), ]
waves6710_15 = waves6710_15[waves6710_15$PID7_w11_pre %in% c("Strong Democrat", "Not Strong Democrat", "Leans Democrat", "Undecided/Independent/Other", "Leans Republican", "Not Strong Republican", "Strong Republican"), ]
waves6710_15 = waves6710_15[waves6710_15$PID7_w13 %in% c("Strong Democrat", "Not Strong Democrat", "Leans Democrat", "Undecided/Independent/Other", "Leans Republican", "Not Strong Republican", "Strong Republican"), ]
waves6710_15 = waves6710_15[waves6710_15$PID7_w14_pre %in% c("Strong Democrat", "Not Strong Democrat", "Leans Democrat", "Undecided/Independent/Other", "Leans Republican", "Not Strong Republican", "Strong Republican"), ]

#Removing if pre-survey partisanship measure is not within the defined date range
waves6710_15 = waves6710_15[waves6710_15$pppadate_w10>=20150728 & waves6710_15$pppadate_w10<=20150903,] #The cut points could be extended to include more subjects
waves6710_15 = waves6710_15[waves6710_15$pppadate_w11>=20160715 & waves6710_15$pppadate_w11<=20160827,] #The cut points could be extended to include more subjects
waves6710_15 = waves6710_15[waves6710_15$pppadate_w14>=20190611 & waves6710_15$pppadate_w14<=20190820,] #The cut points could be extended to include more subjects


##Filtering by columns
all_waves_PID7 = waves6710_15[, c('PID7_w6','PID7_w10_pre','PID7_w11_pre','PID7_w13','PID7_w14_pre','PID7_w15')]
all_waves_PID7$PID7_w10_pre = as.character(all_waves_PID7$PID7_w10_pre)
all_waves_PID7$PID7_w11_pre = as.character(all_waves_PID7$PID7_w11_pre)
all_waves_PID7$PID7_w13 = as.character(all_waves_PID7$PID7_w13)
all_waves_PID7$PID7_w14_pre = as.character(all_waves_PID7$PID7_w14_pre)

##Converting categorical values to numeric
all_waves_PID7 = convert_to_numeric(all_waves_PID7)
all_waves_PID7 = convert_to_numeric_2(all_waves_PID7)

##Appending weights
weights_PID7 = waves6710_15[, c('weight_pre','weight','weight1','weight1.x','weight1.y')] #weight_pre is wave6, weight is wave10, weight1.x is wave11, weight1.y is wave13, and weight1 is wave14.
all_waves_PID7 = cbind(all_waves_PID7,weights_PID7)

##IV Regressions
library('ivreg')
summary(ivreg(PID7_w11_pre ~ PID7_w10_pre, ~ PID7_w6, data = all_waves_PID7))
summary(ivreg(PID7_w11_pre ~ PID7_w10_pre, ~ PID7_w6, data = all_waves_PID7, weights = weight))

summary(ivreg(PID7_w13 ~ PID7_w11_pre, ~ PID7_w10_pre, data = all_waves_PID7))
summary(ivreg(PID7_w13 ~ PID7_w11_pre, ~ PID7_w10_pre, data = all_waves_PID7, weights = weight1.x))

summary(ivreg(PID7_w14_pre ~ PID7_w13, ~ PID7_w11_pre, data = all_waves_PID7))
summary(ivreg(PID7_w14_pre ~ PID7_w13, ~ PID7_w11_pre, data = all_waves_PID7, weights = weight1.y))

summary(ivreg(PID7_w15 ~ PID7_w14_pre, ~ PID7_w13, data = all_waves_PID7))
summary(ivreg(PID7_w15 ~ PID7_w14_pre, ~ PID7_w13, data = all_waves_PID7, weights = weight1))